diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2d9cab8..1179695 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -371,6 +371,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ lvj_mapjoin.q, \ mergejoin_3way.q,\ mrr.q,\ + nullscan.q,\ orc_ppd_basic.q,\ orc_merge_diff_fs.q,\ tez_bmj_schema_evolution.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java index fd60fed..8c6bd14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java @@ -18,19 +18,20 @@ package org.apache.hadoop.hive.ql.io; -import java.io.DataInput; -import java.io.DataOutput; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; /** * NullRowsInputFormat outputs null rows, maximum 100. @@ -41,28 +42,15 @@ static final int MAX_ROW = 100; // to prevent infinite loop static final Log LOG = LogFactory.getLog(NullRowsRecordReader.class.getName()); - public static class DummyInputSplit implements InputSplit { + public static class DummyInputSplit extends FileSplit { + @SuppressWarnings("unused") // Serialization ctor. public DummyInputSplit() { + super(); } - @Override - public long getLength() throws IOException { - return 1; - } - - @Override - public String[] getLocations() throws IOException { - return new String[0]; - } - - @Override - public void readFields(DataInput arg0) throws IOException { + public DummyInputSplit(String path) { + super(new Path(path, "null"), 0, 1, (String[]) null); } - - @Override - public void write(DataOutput arg0) throws IOException { - } - } public static class NullRowsRecordReader implements RecordReader { @@ -71,6 +59,7 @@ public void write(DataOutput arg0) throws IOException { public NullRowsRecordReader() { } + @Override public void close() throws IOException { } @@ -111,11 +100,17 @@ public boolean next(NullWritable arg0, NullWritable arg1) throws IOException { } @Override - public InputSplit[] getSplits(JobConf arg0, int arg1) throws IOException { - InputSplit[] ret = new InputSplit[1]; - ret[0] = new DummyInputSplit(); - LOG.info("Calculating splits"); - return ret; + public InputSplit[] getSplits(JobConf conf, int arg1) throws IOException { + // It's important to read the correct nulls! (in truth, the path is needed for SplitGrouper). + String[] paths = conf.getTrimmedStrings(FileInputFormat.INPUT_DIR, (String[]) null); + if (paths == null) { + throw new IOException("Cannot find path in conf"); + } + InputSplit[] result = new InputSplit[paths.length]; + for (int i = 0; i < paths.length; ++i) { + result[i] = new DummyInputSplit(paths[i]); + } + return result; } @Override diff --git a/ql/src/test/queries/clientpositive/nullscan.q b/ql/src/test/queries/clientpositive/nullscan.q new file mode 100644 index 0000000..cc65964 --- /dev/null +++ b/ql/src/test/queries/clientpositive/nullscan.q @@ -0,0 +1,29 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=false; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=false; + +set hive.vectorized.execution.enabled=true; + +drop table if exists src_orc; + +create table src_orc stored as orc as select * from srcpart limit 10; + +explain extended +select * from src_orc where 1=2; +select * from src_orc where 1=2; + +explain +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key; +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key; + +explain +select count(key) from src_orc where false union all select count(key) from src_orc ; +select count(key) from src_orc where false union all select count(key) from src_orc ; + +explain +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value; +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value; + +drop table if exists src_orc; diff --git a/ql/src/test/results/clientpositive/nullscan.q.out b/ql/src/test/results/clientpositive/nullscan.q.out new file mode 100644 index 0000000..fbfdc69 --- /dev/null +++ b/ql/src/test/results/clientpositive/nullscan.q.out @@ -0,0 +1,449 @@ +PREHOOK: query: drop table if exists src_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists src_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_orc stored as orc as select * from srcpart limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc stored as orc as select * from srcpart limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: explain extended +select * from src_orc where 1=2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from src_orc where 1=2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + 1 + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + -mr-10002default.src_orc{} [src_orc] + Path -> Partition: + -mr-10002default.src_orc{} + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,ds,hr + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.src_orc + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src_orc { string key, string value, string ds, string hr} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 629 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,ds,hr + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.src_orc + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src_orc { string key, string value, string ds, string hr} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 629 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.src_orc + name: default.src_orc + Truncated Path -> Alias: + -mr-10002default.src_orc{} [src_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src_orc where 1=2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from src_orc where 1=2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: explain +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: explain +select count(key) from src_orc where false union all select count(key) from src_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(key) from src_orc where false union all select count(key) from src_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 6 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key) from src_orc where false union all select count(key) from src_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select count(key) from src_orc where false union all select count(key) from src_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +0 +10 +PREHOOK: query: explain +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), ds (type: string), hr (type: string) + TableScan + alias: s2 + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 1 Data size: 629 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), ds (type: string), hr (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 691 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 691 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 691 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: drop table if exists src_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: drop table if exists src_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc diff --git a/ql/src/test/results/clientpositive/tez/nullscan.q.out b/ql/src/test/results/clientpositive/tez/nullscan.q.out new file mode 100644 index 0000000..990550a --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/nullscan.q.out @@ -0,0 +1,445 @@ +PREHOOK: query: drop table if exists src_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists src_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table src_orc stored as orc as select * from srcpart limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc +POSTHOOK: query: create table src_orc stored as orc as select * from srcpart limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc +PREHOOK: query: explain extended +select * from src_orc where 1=2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from src_orc where 1=2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src_orc + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + 1 + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: + -mr-10002default.src_orc{} [src_orc] + Path -> Partition: + -mr-10002default.src_orc{} + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,ds,hr + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.src_orc + numFiles 1 + numRows 10 + rawDataSize 3560 + serialization.ddl struct src_orc { string key, string value, string ds, string hr} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 629 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,ds,hr + columns.comments + columns.types string:string:string:string +#### A masked pattern was here #### + name default.src_orc + numFiles 1 + numRows 10 + rawDataSize 3560 + serialization.ddl struct src_orc { string key, string value, string ds, string hr} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 629 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.src_orc + name: default.src_orc + Truncated Path -> Alias: + -mr-10002default.src_orc{} [src_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src_orc where 1=2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from src_orc where 1=2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: explain +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 391 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 391 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from (select key from src_orc where false) a left outer join (select key from src_orc limit 0) b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: explain +select count(key) from src_orc where false union all select count(key) from src_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(key) from src_orc where false union all select count(key) from src_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key) from src_orc where false union all select count(key) from src_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select count(key) from src_orc where false union all select count(key) from src_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +0 +10 +PREHOOK: query: explain +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), ds (type: string), hr (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 10 Data size: 3560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), ds (type: string), hr (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 value (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 391 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 391 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 391 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from src_orc s1, src_orc s2 where false and s1.value = s2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc +#### A masked pattern was here #### +PREHOOK: query: drop table if exists src_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc +POSTHOOK: query: drop table if exists src_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc