diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 15e0db4..cb00ea1 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -37,6 +37,14 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ minitez.query.files.shared=delete_orig_table.q,\ orc_merge12.q,\ orc_vectorization_ppd.q,\ + script_env_var1.q,\ + script_env_var2.q,\ + script_pipe.q,\ + scriptfile1.q,\ + transform1.q,\ + transform2.q,\ + transform_ppr1.q,\ + transform_ppr2.q,\ unionDistinct_2.q,\ update_orig_table.q,\ vector_join_part_col_char.q,\ diff --git a/ql/src/test/results/clientpositive/tez/script_env_var1.q.out b/ql/src/test/results/clientpositive/tez/script_env_var1.q.out new file mode 100644 index 0000000..cd39eb8 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/script_env_var1.q.out @@ -0,0 +1,18 @@ +PREHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT * FROM (SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src order by key LIMIT 1)x UNION ALL + SELECT * FROM (SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src order by key LIMIT 1)y ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT * FROM (SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src order by key LIMIT 1)x UNION ALL + SELECT * FROM (SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src order by key LIMIT 1)y ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 diff --git a/ql/src/test/results/clientpositive/tez/script_env_var2.q.out b/ql/src/test/results/clientpositive/tez/script_env_var2.q.out new file mode 100644 index 0000000..c3bb990 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/script_env_var2.q.out @@ -0,0 +1,16 @@ +PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT * FROM (SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1)a UNION ALL + SELECT * FROM (SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1)b ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT * FROM (SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1)a UNION ALL + SELECT * FROM (SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1)b ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 diff --git a/ql/src/test/results/clientpositive/tez/script_pipe.q.out b/ql/src/test/results/clientpositive/tez/script_pipe.q.out new file mode 100644 index 0000000..77ddb1b --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/script_pipe.q.out @@ -0,0 +1,127 @@ +PREHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Transform Operator + command: true + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 500 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + Transform Operator + command: head -n 1 + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 534000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 diff --git a/ql/src/test/results/clientpositive/tez/scriptfile1.q.out b/ql/src/test/results/clientpositive/tez/scriptfile1.q.out new file mode 100644 index 0000000..bf202f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/scriptfile1.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- EXCLUDE_OS_WINDOWS + +-- NO_SESSION_REUSE + +CREATE TABLE dest1(key INT, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- EXCLUDE_OS_WINDOWS + +-- NO_SESSION_REUSE + +CREATE TABLE dest1(key INT, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM ( + FROM src + SELECT TRANSFORM(src.key, src.value) + USING 'testgrep' AS (tkey, tvalue) + CLUSTER BY tkey +) tmap +INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM ( + FROM src + SELECT TRANSFORM(src.key, src.value) + USING 'testgrep' AS (tkey, tvalue) + CLUSTER BY tkey +) tmap +INSERT OVERWRITE TABLE dest1 SELECT tmap.tkey, tmap.tvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +310 val_310 diff --git a/ql/src/test/results/clientpositive/tez/transform1.q.out b/ql/src/test/results/clientpositive/tez/transform1.q.out new file mode 100644 index 0000000..87e4482 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/transform1.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: create table transform1_t1(a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@transform1_t1 +POSTHOOK: query: create table transform1_t1(a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transform1_t1 +PREHOOK: query: EXPLAIN +SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: transform1_t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +PREHOOK: type: QUERY +PREHOOK: Input: cat +PREHOOK: Input: default@transform1_t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: cat +POSTHOOK: Input: default@transform1_t1 +#### A masked pattern was here #### +PREHOOK: query: create table transform1_t2(col array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@transform1_t2 +POSTHOOK: query: create table transform1_t2(col array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transform1_t2 +PREHOOK: query: insert overwrite table transform1_t2 +select array(1,2,3) from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@transform1_t2 +POSTHOOK: query: insert overwrite table transform1_t2 +select array(1,2,3) from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@transform1_t2 +POSTHOOK: Lineage: transform1_t2.col EXPRESSION [] +PREHOOK: query: EXPLAIN +SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: transform1_t2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '012' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +PREHOOK: type: QUERY +PREHOOK: Input: cat +PREHOOK: Input: default@transform1_t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: cat +POSTHOOK: Input: default@transform1_t2 +#### A masked pattern was here #### +[0,1,2] diff --git a/ql/src/test/results/clientpositive/tez/transform2.q.out b/ql/src/test/results/clientpositive/tez/transform2.q.out new file mode 100644 index 0000000..28d098d --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/transform2.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +23 NULL diff --git a/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out b/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out new file mode 100644 index 0000000..338a485 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out @@ -0,0 +1,496 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: PARTIAL + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + isSamplingPred: false + predicate: ((_col1 < 100) and (_col0 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: '2008-04-08' (type: string), _col1 (type: string), _col2 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: cat +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: cat +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +2 val_2 +2 val_2 +20 val_20 +20 val_20 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +27 val_27 +27 val_27 +28 val_28 +28 val_28 +30 val_30 +30 val_30 +33 val_33 +33 val_33 +34 val_34 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +4 val_4 +4 val_4 +41 val_41 +41 val_41 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +43 val_43 +43 val_43 +44 val_44 +44 val_44 +47 val_47 +47 val_47 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +53 val_53 +54 val_54 +54 val_54 +57 val_57 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +64 val_64 +65 val_65 +65 val_65 +66 val_66 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +77 val_77 +78 val_78 +78 val_78 +8 val_8 +8 val_8 +80 val_80 +80 val_80 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +9 val_9 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git a/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out b/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out new file mode 100644 index 0000000..a3e63ab --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out @@ -0,0 +1,402 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: '2008-04-08' (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1000 Data size: 272000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (_col1 < 100) (type: boolean) + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +PREHOOK: type: QUERY +PREHOOK: Input: cat +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: cat +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +2 val_2 +2 val_2 +20 val_20 +20 val_20 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +27 val_27 +27 val_27 +28 val_28 +28 val_28 +30 val_30 +30 val_30 +33 val_33 +33 val_33 +34 val_34 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +4 val_4 +4 val_4 +41 val_41 +41 val_41 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +43 val_43 +43 val_43 +44 val_44 +44 val_44 +47 val_47 +47 val_47 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +53 val_53 +54 val_54 +54 val_54 +57 val_57 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +64 val_64 +65 val_65 +65 val_65 +66 val_66 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +77 val_77 +78 val_78 +78 val_78 +8 val_8 +8 val_8 +80 val_80 +80 val_80 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +9 val_9 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98