diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index a0829a1..570a27f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -389,8 +389,12 @@ spark.query.files=alter_merge_orc.q \ sample6.q \ sample7.q \ sample9.q \ + script_env_var1.q \ + script_env_var2.q \ + script_pipe.q \ sort.q \ spark_test.q \ + temp_table.q \ timestamp_1.q \ timestamp_2.q \ timestamp_3.q \ @@ -398,6 +402,10 @@ spark.query.files=alter_merge_orc.q \ timestamp_lazy.q \ timestamp_null.q \ timestamp_udf.q \ + transform_ppr1.q \ + transform_ppr2.q \ + transform1.q \ + transform2.q \ union.q \ union10.q \ union11.q \ diff --git ql/src/test/results/clientpositive/spark/script_env_var1.q.out ql/src/test/results/clientpositive/spark/script_env_var1.q.out new file mode 100644 index 0000000..8e1075a --- /dev/null +++ ql/src/test/results/clientpositive/spark/script_env_var1.q.out @@ -0,0 +1,18 @@ +PREHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src LIMIT 1 ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'sh' AS key FROM src LIMIT 1 ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 diff --git ql/src/test/results/clientpositive/spark/script_env_var2.q.out ql/src/test/results/clientpositive/spark/script_env_var2.q.out new file mode 100644 index 0000000..89f3606 --- /dev/null +++ ql/src/test/results/clientpositive/spark/script_env_var2.q.out @@ -0,0 +1,16 @@ +PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1 ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $MY_ID') USING 'sh' AS key FROM src LIMIT 1 ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 diff --git ql/src/test/results/clientpositive/spark/script_pipe.q.out ql/src/test/results/clientpositive/spark/script_pipe.q.out new file mode 100644 index 0000000..7571009 --- /dev/null +++ ql/src/test/results/clientpositive/spark/script_pipe.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Transform Operator + command: true + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Transform Operator + command: head -n 1 + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 diff --git ql/src/test/results/clientpositive/spark/temp_table.q.out ql/src/test/results/clientpositive/spark/temp_table.q.out new file mode 100644 index 0000000..fe71014 --- /dev/null +++ ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -0,0 +1,417 @@ +PREHOOK: query: EXPLAIN CREATE TEMPORARY TABLE foo AS SELECT * FROM src WHERE key % 2 = 0 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: EXPLAIN CREATE TEMPORARY TABLE foo AS SELECT * FROM src WHERE key % 2 = 0 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key % 2) = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.foo + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: foo + isTemporary: true + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: CREATE TEMPORARY TABLE foo AS SELECT * FROM src WHERE key % 2 = 0 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TEMPORARY TABLE foo AS SELECT * FROM src WHERE key % 2 = 0 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@foo +PREHOOK: query: EXPLAIN CREATE TEMPORARY TABLE bar AS SELECT * FROM src WHERE key % 2 = 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: EXPLAIN CREATE TEMPORARY TABLE bar AS SELECT * FROM src WHERE key % 2 = 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key % 2) = 1) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bar + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bar + isTemporary: true + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: CREATE TEMPORARY TABLE bar AS SELECT * FROM src WHERE key % 2 = 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TEMPORARY TABLE bar AS SELECT * FROM src WHERE key % 2 = 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bar +PREHOOK: query: DESCRIBE foo +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@foo +POSTHOOK: query: DESCRIBE foo +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@foo +key string +value string +PREHOOK: query: DESCRIBE bar +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@bar +POSTHOOK: query: DESCRIBE bar +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@bar +key string +value string +PREHOOK: query: explain select * from foo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from foo limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: foo + Statistics: Num rows: 14 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from foo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select * from foo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +238 val_238 +86 val_86 +278 val_278 +98 val_98 +484 val_484 +150 val_150 +224 val_224 +66 val_66 +128 val_128 +146 val_146 +PREHOOK: query: explain select * from (select * from foo union all select * from bar) u order by key limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select * from foo union all select * from bar) u order by key limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Union 2 (GROUP SORT) + Union 2 <- Map 1 (NONE), Map 4 (NONE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: bar + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: foo + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select * from foo union all select * from bar) u order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select * from (select * from foo union all select * from bar) u order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: CREATE TEMPORARY TABLE baz LIKE foo +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@baz +POSTHOOK: query: CREATE TEMPORARY TABLE baz LIKE foo +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@baz +PREHOOK: query: INSERT OVERWRITE TABLE baz SELECT * from foo +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Output: default@baz +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE baz SELECT * from foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@baz +POSTHOOK: Lineage: baz.key SIMPLE [(foo)foo.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: baz.value SIMPLE [(foo)foo.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TEMPORARY TABLE bay (key string, value string) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bay +POSTHOOK: query: CREATE TEMPORARY TABLE bay (key string, value string) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bay +PREHOOK: query: select * from bay +PREHOOK: type: QUERY +PREHOOK: Input: default@bay +#### A masked pattern was here #### +POSTHOOK: query: select * from bay +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bay +#### A masked pattern was here #### +PREHOOK: query: INSERT OVERWRITE TABLE bay SELECT * FROM src ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bay +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE bay SELECT * FROM src ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bay +POSTHOOK: Lineage: bay.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bay.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from bay limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bay +#### A masked pattern was here #### +POSTHOOK: query: select * from bay limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bay +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: SHOW TABLES +PREHOOK: type: SHOWTABLES +POSTHOOK: query: SHOW TABLES +POSTHOOK: type: SHOWTABLES +alltypesorc +bar +bay +baz +foo +src +src1 +src_json +src_sequencefile +src_thrift +srcbucket +srcbucket2 +srcpart +PREHOOK: query: CREATE DATABASE two +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:two +POSTHOOK: query: CREATE DATABASE two +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:two +PREHOOK: query: USE two +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:two +POSTHOOK: query: USE two +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:two +PREHOOK: query: SHOW TABLES +PREHOOK: type: SHOWTABLES +POSTHOOK: query: SHOW TABLES +POSTHOOK: type: SHOWTABLES +PREHOOK: query: CREATE TEMPORARY TABLE foo AS SELECT * FROM default.foo +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@foo +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TEMPORARY TABLE foo AS SELECT * FROM default.foo +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@foo +POSTHOOK: Output: two@foo +PREHOOK: query: SHOW TABLES +PREHOOK: type: SHOWTABLES +POSTHOOK: query: SHOW TABLES +POSTHOOK: type: SHOWTABLES +foo +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:default +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:default +PREHOOK: query: DROP DATABASE two CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:two +PREHOOK: Output: database:two +PREHOOK: Output: two@foo +POSTHOOK: query: DROP DATABASE two CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:two +POSTHOOK: Output: database:two +POSTHOOK: Output: two@foo +PREHOOK: query: DROP TABLE bay +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bay +PREHOOK: Output: default@bay +POSTHOOK: query: DROP TABLE bay +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bay +POSTHOOK: Output: default@bay diff --git ql/src/test/results/clientpositive/spark/transform1.q.out ql/src/test/results/clientpositive/spark/transform1.q.out new file mode 100644 index 0000000..a657d01 --- /dev/null +++ ql/src/test/results/clientpositive/spark/transform1.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: create table transform1_t1(a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@transform1_t1 +POSTHOOK: query: create table transform1_t1(a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transform1_t1 +PREHOOK: query: EXPLAIN +SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: transform1_t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@transform1_t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transform1_t1 +#### A masked pattern was here #### +PREHOOK: query: create table transform1_t2(col array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@transform1_t2 +POSTHOOK: query: create table transform1_t2(col array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@transform1_t2 +PREHOOK: query: insert overwrite table transform1_t2 +select array(1,2,3) from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@transform1_t2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table transform1_t2 +select array(1,2,3) from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@transform1_t2 +POSTHOOK: Lineage: transform1_t2.col EXPRESSION [] +PREHOOK: query: EXPLAIN +SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: transform1_t2 + Statistics: Num rows: -1 Data size: 6 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: '012' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: -1 Data size: 6 Basic stats: PARTIAL Column stats: COMPLETE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: -1 Data size: 6 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: -1 Data size: 6 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@transform1_t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@transform1_t2 +#### A masked pattern was here #### +[0,1,2] diff --git ql/src/test/results/clientpositive/spark/transform2.q.out ql/src/test/results/clientpositive/spark/transform2.q.out new file mode 100644 index 0000000..28d098d --- /dev/null +++ ql/src/test/results/clientpositive/spark/transform2.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +23 NULL diff --git ql/src/test/results/clientpositive/spark/transform_ppr1.q.out ql/src/test/results/clientpositive/spark/transform_ppr1.q.out new file mode 100644 index 0000000..e80a39c --- /dev/null +++ ql/src/test/results/clientpositive/spark/transform_ppr1.q.out @@ -0,0 +1,562 @@ +PREHOOK: query: EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + ds + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_ALIASLIST + ds + tkey + tvalue + TOK_CLUSTERBY + TOK_TABLE_OR_COL + tkey + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tkey + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tvalue + TOK_WHERE + AND + < + . + TOK_TABLE_OR_COL + tmap + tkey + 100 + = + . + TOK_TABLE_OR_COL + tmap + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col1 < 100) and (_col0 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: '2008-04-08' (type: string), _col1 (type: string), _col2 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + /srcpart/ds=2008-04-09/hr=11 [src] + /srcpart/ds=2008-04-09/hr=12 [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +2 val_2 +2 val_2 +20 val_20 +20 val_20 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +27 val_27 +27 val_27 +28 val_28 +28 val_28 +30 val_30 +30 val_30 +33 val_33 +33 val_33 +34 val_34 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +4 val_4 +4 val_4 +41 val_41 +41 val_41 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +43 val_43 +43 val_43 +44 val_44 +44 val_44 +47 val_47 +47 val_47 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +53 val_53 +54 val_54 +54 val_54 +57 val_57 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +64 val_64 +65 val_65 +65 val_65 +66 val_66 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +77 val_77 +78 val_78 +78 val_78 +8 val_8 +8 val_8 +80 val_80 +80 val_80 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +9 val_9 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/transform_ppr2.q.out ql/src/test/results/clientpositive/spark/transform_ppr2.q.out new file mode 100644 index 0000000..6a27eec --- /dev/null +++ ql/src/test/results/clientpositive/spark/transform_ppr2.q.out @@ -0,0 +1,468 @@ +PREHOOK: query: EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TRANSFORM + TOK_EXPLIST + . + TOK_TABLE_OR_COL + src + ds + . + TOK_TABLE_OR_COL + src + key + . + TOK_TABLE_OR_COL + src + value + TOK_SERDE + TOK_RECORDWRITER + 'cat' + TOK_SERDE + TOK_RECORDREADER + TOK_ALIASLIST + ds + tkey + tvalue + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_CLUSTERBY + TOK_TABLE_OR_COL + tkey + tmap + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tkey + TOK_SELEXPR + . + TOK_TABLE_OR_COL + tmap + tvalue + TOK_WHERE + < + . + TOK_TABLE_OR_COL + tmap + tkey + 100 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col1 < 100) (type: boolean) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 19 Data size: 3807 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM ( + FROM srcpart src + SELECT TRANSFORM(src.ds, src.key, src.value) + USING 'cat' AS (ds, tkey, tvalue) + WHERE src.ds = '2008-04-08' + CLUSTER BY tkey +) tmap +SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +2 val_2 +2 val_2 +20 val_20 +20 val_20 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +27 val_27 +27 val_27 +28 val_28 +28 val_28 +30 val_30 +30 val_30 +33 val_33 +33 val_33 +34 val_34 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +4 val_4 +4 val_4 +41 val_41 +41 val_41 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +43 val_43 +43 val_43 +44 val_44 +44 val_44 +47 val_47 +47 val_47 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +53 val_53 +54 val_54 +54 val_54 +57 val_57 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +64 val_64 +65 val_65 +65 val_65 +66 val_66 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +77 val_77 +78 val_78 +78 val_78 +8 val_8 +8 val_8 +80 val_80 +80 val_80 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +9 val_9 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98