diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ecb8b74..0cec7a2 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -310,6 +310,16 @@ minimr.query.negative.files=cluster_tasklog_retrieval.q,\ udf_local_resource.q spark.query.files=spark_test.q \ + alter_merge_orc.q, \ + alter_merge_stats_orc.q, \ + bucket2.q, \ + bucket3.q, \ + bucket4.q, \ + count.q, \ + create_merge_compressed.q, \ + ctas.q, \ + custom_input_output_format.q, \ + disable_merge_for_bucketing.q, \ avro_compression_enabled_native.q \ avro_decimal_native.q \ ptf_decimal.q \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index d16f1be..c0eb603 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -190,6 +191,12 @@ private JobConf cloneJobConf(BaseWork work) throws Exception { JobConf cloned = new JobConf(jobConf); // Make sure we'll use a different plan path from the original one HiveConf.setVar(cloned, HiveConf.ConfVars.PLAN, ""); + try { + cloned.setPartitionerClass((Class) (Class.forName(HiveConf.getVar(cloned, + HiveConf.ConfVars.HIVEPARTITIONER)))); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Error initializing job: " + e.getMessage(), e); + } if (work instanceof MapWork) { List inputPaths = Utilities.getInputPaths(cloned, (MapWork) work, scratchDir, context, false); Utilities.setInputPaths(cloned, inputPaths); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index dc621cf..3f5752c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -250,10 +250,10 @@ protected void optimizeTaskPlan(List> rootTasks, Pa } if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - (new Vectorizer()).resolve(physicalCtx); + physicalCtx = (new Vectorizer()).resolve(physicalCtx); } if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { - (new StageIDsRearranger()).resolve(physicalCtx); + physicalCtx = (new StageIDsRearranger()).resolve(physicalCtx); } return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java index 026f4e0..16b4460 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java @@ -41,14 +41,16 @@ @Override public boolean connect(Configuration hconf, Task sourceTask) { - try { - jc = new JobClient(toJobConf(hconf)); - RunningJob job = jc.getJob(((MapRedTask)sourceTask).getJobID()); - if (job != null) { - counters = job.getCounters(); + if (sourceTask instanceof MapRedTask) { + try { + jc = new JobClient(toJobConf(hconf)); + RunningJob job = jc.getJob(((MapRedTask)sourceTask).getJobID()); + if (job != null) { + counters = job.getCounters(); + } + } catch (Exception e) { + LOG.error("Failed to get Job instance for " + sourceTask.getJobID(),e); } - } catch (Exception e) { - LOG.error("Failed to get Job instance for " + sourceTask.getJobID(),e); } return counters != null; } @@ -59,9 +61,13 @@ private JobConf toJobConf(Configuration hconf) { @Override public String aggregateStats(String counterGrpName, String statType) { - // In case of counters, aggregation is done by JobTracker / MR AM itself - // so no need to aggregate, simply return the counter value for requested stat. - return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType)); + long value = 0; + if (counters != null) { + // In case of counters, aggregation is done by JobTracker / MR AM itself + // so no need to aggregate, simply return the counter value for requested stat. + value = counters.getGroup(counterGrpName).getCounter(statType); + } + return String.valueOf(value); } @Override diff --git a/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out b/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out new file mode 100644 index 0000000..bfa352b --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out @@ -0,0 +1,269 @@ +PREHOOK: query: create table src_orc_merge_test(key int, value string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test +POSTHOOK: query: create table src_orc_merge_test(key int, value string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test +PREHOOK: query: insert overwrite table src_orc_merge_test select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table src_orc_merge_test select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test +POSTHOOK: Lineage: src_orc_merge_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test +POSTHOOK: Lineage: src_orc_merge_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test +POSTHOOK: Lineage: src_orc_merge_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: show table extended like `src_orc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:3 +totalFileSize:7380 +maxFileSize:2460 +minFileSize:2460 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from src_orc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src_orc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +1500 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +390273 108631194210 +PREHOOK: query: alter table src_orc_merge_test concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@src_orc_merge_test +PREHOOK: Output: default@src_orc_merge_test +POSTHOOK: query: alter table src_orc_merge_test concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@src_orc_merge_test +POSTHOOK: Output: default@src_orc_merge_test +PREHOOK: query: show table extended like `src_orc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:7059 +maxFileSize:7059 +minFileSize:7059 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from src_orc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src_orc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +1500 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test +#### A masked pattern was here #### +390273 108631194210 +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2011') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2011') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2011 +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_part partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: show table extended like `src_orc_merge_test_part` partition (ds='2011') +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test_part` partition (ds='2011') +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test_part +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds} +totalNumberFiles:3 +totalFileSize:7380 +maxFileSize:2460 +minFileSize:2460 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from src_orc_merge_test_part +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src_orc_merge_test_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +1500 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +390273 108631194210 +PREHOOK: query: alter table src_orc_merge_test_part partition (ds='2011') concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Output: default@src_orc_merge_test_part@ds=2011 +POSTHOOK: query: alter table src_orc_merge_test_part partition (ds='2011') concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2011 +PREHOOK: query: show table extended like `src_orc_merge_test_part` partition (ds='2011') +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test_part` partition (ds='2011') +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test_part +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds} +totalNumberFiles:1 +totalFileSize:7059 +maxFileSize:7059 +minFileSize:7059 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from src_orc_merge_test_part +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src_orc_merge_test_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +1500 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part +PREHOOK: type: QUERY +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Input: default@src_orc_merge_test_part@ds=2011 +#### A masked pattern was here #### +390273 108631194210 +PREHOOK: query: drop table src_orc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc_merge_test +PREHOOK: Output: default@src_orc_merge_test +POSTHOOK: query: drop table src_orc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc_merge_test +POSTHOOK: Output: default@src_orc_merge_test +PREHOOK: query: drop table src_orc_merge_test_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: drop table src_orc_merge_test_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part diff --git a/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out new file mode 100644 index 0000000..9226001 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out @@ -0,0 +1,388 @@ +PREHOOK: query: create table src_orc_merge_test_stat(key int, value string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: query: create table src_orc_merge_test_stat(key int, value string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_stat +PREHOOK: query: insert overwrite table src_orc_merge_test_stat select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_stat +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table src_orc_merge_test_stat select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: Lineage: src_orc_merge_test_stat.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_stat.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_stat select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_stat +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: Lineage: src_orc_merge_test_stat.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_stat.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_stat select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_stat +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_stat select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: Lineage: src_orc_merge_test_stat.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_stat.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: show table extended like `src_orc_merge_test_stat` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test_stat` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test_stat +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:3 +totalFileSize:7380 +maxFileSize:2460 +minFileSize:2460 +#### A masked pattern was here #### + +PREHOOK: query: desc extended src_orc_merge_test_stat +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_stat +POSTHOOK: query: desc extended src_orc_merge_test_stat +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_stat +key int +value string + +#### A masked pattern was here #### +PREHOOK: query: analyze table src_orc_merge_test_stat compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: query: analyze table src_orc_merge_test_stat compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_orc_merge_test_stat +PREHOOK: query: desc formatted src_orc_merge_test_stat +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_stat +POSTHOOK: query: desc formatted src_orc_merge_test_stat +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_stat +# col_name data_type comment + +key int +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 1500 + rawDataSize 141000 + totalSize 7380 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table src_orc_merge_test_stat concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@src_orc_merge_test_stat +PREHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: query: alter table src_orc_merge_test_stat concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@src_orc_merge_test_stat +POSTHOOK: Output: default@src_orc_merge_test_stat +PREHOOK: query: analyze table src_orc_merge_test_stat compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: query: analyze table src_orc_merge_test_stat compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_orc_merge_test_stat +PREHOOK: query: desc formatted src_orc_merge_test_stat +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_stat +POSTHOOK: query: desc formatted src_orc_merge_test_stat +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_stat +# col_name data_type comment + +key int +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1500 + rawDataSize 141000 + totalSize 7059 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: create table src_orc_merge_test_part_stat(key int, value string) partitioned by (ds string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: query: create table src_orc_merge_test_part_stat(key int, value string) partitioned by (ds string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part_stat +PREHOOK: query: alter table src_orc_merge_test_part_stat add partition (ds='2011') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: query: alter table src_orc_merge_test_part_stat add partition (ds='2011') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +PREHOOK: query: insert overwrite table src_orc_merge_test_part_stat partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table src_orc_merge_test_part_stat partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src_orc_merge_test_part_stat partition (ds='2011') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part_stat PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: show table extended like `src_orc_merge_test_part_stat` partition (ds='2011') +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_orc_merge_test_part_stat` partition (ds='2011') +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_orc_merge_test_part_stat +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +outputformat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +columns:struct columns { i32 key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds} +totalNumberFiles:3 +totalFileSize:7380 +maxFileSize:2460 +minFileSize:2460 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part_stat +POSTHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part_stat +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2011] +Database: default +Table: src_orc_merge_test_part_stat +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows -1 + rawDataSize -1 + totalSize 7380 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table src_orc_merge_test_part_stat partition(ds='2011') compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@src_orc_merge_test_part_stat +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: query: analyze table src_orc_merge_test_part_stat partition(ds='2011') compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +PREHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part_stat +POSTHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part_stat +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2011] +Database: default +Table: src_orc_merge_test_part_stat +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 1500 + rawDataSize 141000 + totalSize 7380 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table src_orc_merge_test_part_stat partition (ds='2011') concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@src_orc_merge_test_part_stat +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: query: alter table src_orc_merge_test_part_stat partition (ds='2011') concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@src_orc_merge_test_part_stat +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +PREHOOK: query: analyze table src_orc_merge_test_part_stat partition(ds='2011') compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@src_orc_merge_test_part_stat +PREHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +POSTHOOK: query: analyze table src_orc_merge_test_part_stat partition(ds='2011') compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: Output: default@src_orc_merge_test_part_stat@ds=2011 +PREHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part_stat +POSTHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part_stat +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string + +# Detailed Partition Information +Partition Value: [2011] +Database: default +Table: src_orc_merge_test_part_stat +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1500 + rawDataSize 141000 + totalSize 7059 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table src_orc_merge_test_stat +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc_merge_test_stat +PREHOOK: Output: default@src_orc_merge_test_stat +POSTHOOK: query: drop table src_orc_merge_test_stat +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc_merge_test_stat +POSTHOOK: Output: default@src_orc_merge_test_stat +PREHOOK: query: drop table src_orc_merge_test_part_stat +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc_merge_test_part_stat +PREHOOK: Output: default@src_orc_merge_test_part_stat +POSTHOOK: query: drop table src_orc_merge_test_part_stat +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc_merge_test_part_stat +POSTHOOK: Output: default@src_orc_merge_test_part_stat diff --git a/ql/src/test/results/clientpositive/spark/bucket2.q.out b/ql/src/test/results/clientpositive/spark/bucket2.q.out new file mode 100644 index 0000000..970628e --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -0,0 +1,477 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2_1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2_1 +PREHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket2_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket2_1 + TotalFiles: 2 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket2_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket2_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket2_1 +POSTHOOK: Lineage: bucket2_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket2_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +104 val_104 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +134 val_134 +134 val_134 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +146 val_146 +146 val_146 +150 val_150 +152 val_152 +152 val_152 +156 val_156 +158 val_158 +160 val_160 +162 val_162 +164 val_164 +164 val_164 +166 val_166 +168 val_168 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +176 val_176 +176 val_176 +178 val_178 +18 val_18 +18 val_18 +180 val_180 +186 val_186 +190 val_190 +192 val_192 +194 val_194 +196 val_196 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +202 val_202 +208 val_208 +208 val_208 +208 val_208 +214 val_214 +216 val_216 +216 val_216 +218 val_218 +222 val_222 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +238 val_238 +238 val_238 +24 val_24 +24 val_24 +242 val_242 +242 val_242 +244 val_244 +248 val_248 +252 val_252 +256 val_256 +256 val_256 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +266 val_266 +272 val_272 +272 val_272 +274 val_274 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +282 val_282 +282 val_282 +284 val_284 +286 val_286 +288 val_288 +288 val_288 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +306 val_306 +308 val_308 +310 val_310 +316 val_316 +316 val_316 +316 val_316 +318 val_318 +318 val_318 +318 val_318 +322 val_322 +322 val_322 +332 val_332 +336 val_336 +338 val_338 +34 val_34 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +366 val_366 +368 val_368 +374 val_374 +378 val_378 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +392 val_392 +394 val_394 +396 val_396 +396 val_396 +396 val_396 +4 val_4 +400 val_400 +402 val_402 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +414 val_414 +414 val_414 +418 val_418 +42 val_42 +42 val_42 +424 val_424 +424 val_424 +430 val_430 +430 val_430 +430 val_430 +432 val_432 +436 val_436 +438 val_438 +438 val_438 +438 val_438 +44 val_44 +444 val_444 +446 val_446 +448 val_448 +452 val_452 +454 val_454 +454 val_454 +454 val_454 +458 val_458 +458 val_458 +460 val_460 +462 val_462 +462 val_462 +466 val_466 +466 val_466 +466 val_466 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +470 val_470 +472 val_472 +478 val_478 +478 val_478 +480 val_480 +480 val_480 +480 val_480 +482 val_482 +484 val_484 +490 val_490 +492 val_492 +492 val_492 +494 val_494 +496 val_496 +498 val_498 +498 val_498 +498 val_498 +54 val_54 +58 val_58 +58 val_58 +64 val_64 +66 val_66 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +84 val_84 +84 val_84 +86 val_86 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +96 val_96 +98 val_98 +98 val_98 diff --git a/ql/src/test/results/clientpositive/spark/bucket3.q.out b/ql/src/test/results/clientpositive/spark/bucket3.q.out new file mode 100644 index 0000000..241ed2b --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -0,0 +1,503 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket3_1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE bucket3_1(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket3_1 +PREHOOK: query: explain extended +insert overwrite table bucket3_1 partition (ds='1') +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket3_1 partition (ds='1') +select * from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket3_1 + TOK_PARTSPEC + TOK_PARTVAL + ds + '1' + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 2 + Static Partition Specification: ds=1/ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket3_1 + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket3_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket3_1 + TotalFiles: 2 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket3_1 + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket3_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket3_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket3_1@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket3_1 partition (ds='1') +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket3_1@ds=1 +POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket3_1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table bucket3_1 partition (ds='2') +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket3_1@ds=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket3_1 partition (ds='2') +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket3_1@ds=2 +POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket3_1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket3_1 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket3_1 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from bucket3_1 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket3_1 +PREHOOK: Input: default@bucket3_1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket3_1 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket3_1 +POSTHOOK: Input: default@bucket3_1@ds=1 +#### A masked pattern was here #### +0 val_0 1 +0 val_0 1 +0 val_0 1 +10 val_10 1 +100 val_100 1 +100 val_100 1 +104 val_104 1 +104 val_104 1 +114 val_114 1 +116 val_116 1 +118 val_118 1 +118 val_118 1 +12 val_12 1 +12 val_12 1 +120 val_120 1 +120 val_120 1 +126 val_126 1 +128 val_128 1 +128 val_128 1 +128 val_128 1 +134 val_134 1 +134 val_134 1 +136 val_136 1 +138 val_138 1 +138 val_138 1 +138 val_138 1 +138 val_138 1 +146 val_146 1 +146 val_146 1 +150 val_150 1 +152 val_152 1 +152 val_152 1 +156 val_156 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +164 val_164 1 +164 val_164 1 +166 val_166 1 +168 val_168 1 +170 val_170 1 +172 val_172 1 +172 val_172 1 +174 val_174 1 +174 val_174 1 +176 val_176 1 +176 val_176 1 +178 val_178 1 +18 val_18 1 +18 val_18 1 +180 val_180 1 +186 val_186 1 +190 val_190 1 +192 val_192 1 +194 val_194 1 +196 val_196 1 +2 val_2 1 +20 val_20 1 +200 val_200 1 +200 val_200 1 +202 val_202 1 +208 val_208 1 +208 val_208 1 +208 val_208 1 +214 val_214 1 +216 val_216 1 +216 val_216 1 +218 val_218 1 +222 val_222 1 +224 val_224 1 +224 val_224 1 +226 val_226 1 +228 val_228 1 +230 val_230 1 +230 val_230 1 +230 val_230 1 +230 val_230 1 +230 val_230 1 +238 val_238 1 +238 val_238 1 +24 val_24 1 +24 val_24 1 +242 val_242 1 +242 val_242 1 +244 val_244 1 +248 val_248 1 +252 val_252 1 +256 val_256 1 +256 val_256 1 +258 val_258 1 +26 val_26 1 +26 val_26 1 +260 val_260 1 +262 val_262 1 +266 val_266 1 +272 val_272 1 +272 val_272 1 +274 val_274 1 +278 val_278 1 +278 val_278 1 +28 val_28 1 +280 val_280 1 +280 val_280 1 +282 val_282 1 +282 val_282 1 +284 val_284 1 +286 val_286 1 +288 val_288 1 +288 val_288 1 +292 val_292 1 +296 val_296 1 +298 val_298 1 +298 val_298 1 +298 val_298 1 +30 val_30 1 +302 val_302 1 +306 val_306 1 +308 val_308 1 +310 val_310 1 +316 val_316 1 +316 val_316 1 +316 val_316 1 +318 val_318 1 +318 val_318 1 +318 val_318 1 +322 val_322 1 +322 val_322 1 +332 val_332 1 +336 val_336 1 +338 val_338 1 +34 val_34 1 +342 val_342 1 +342 val_342 1 +344 val_344 1 +344 val_344 1 +348 val_348 1 +348 val_348 1 +348 val_348 1 +348 val_348 1 +348 val_348 1 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +366 val_366 1 +368 val_368 1 +374 val_374 1 +378 val_378 1 +382 val_382 1 +382 val_382 1 +384 val_384 1 +384 val_384 1 +384 val_384 1 +386 val_386 1 +392 val_392 1 +394 val_394 1 +396 val_396 1 +396 val_396 1 +396 val_396 1 +4 val_4 1 +400 val_400 1 +402 val_402 1 +404 val_404 1 +404 val_404 1 +406 val_406 1 +406 val_406 1 +406 val_406 1 +406 val_406 1 +414 val_414 1 +414 val_414 1 +418 val_418 1 +42 val_42 1 +42 val_42 1 +424 val_424 1 +424 val_424 1 +430 val_430 1 +430 val_430 1 +430 val_430 1 +432 val_432 1 +436 val_436 1 +438 val_438 1 +438 val_438 1 +438 val_438 1 +44 val_44 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +452 val_452 1 +454 val_454 1 +454 val_454 1 +454 val_454 1 +458 val_458 1 +458 val_458 1 +460 val_460 1 +462 val_462 1 +462 val_462 1 +466 val_466 1 +466 val_466 1 +466 val_466 1 +468 val_468 1 +468 val_468 1 +468 val_468 1 +468 val_468 1 +470 val_470 1 +472 val_472 1 +478 val_478 1 +478 val_478 1 +480 val_480 1 +480 val_480 1 +480 val_480 1 +482 val_482 1 +484 val_484 1 +490 val_490 1 +492 val_492 1 +492 val_492 1 +494 val_494 1 +496 val_496 1 +498 val_498 1 +498 val_498 1 +498 val_498 1 +54 val_54 1 +58 val_58 1 +58 val_58 1 +64 val_64 1 +66 val_66 1 +70 val_70 1 +70 val_70 1 +70 val_70 1 +72 val_72 1 +72 val_72 1 +74 val_74 1 +76 val_76 1 +76 val_76 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +84 val_84 1 +84 val_84 1 +86 val_86 1 +90 val_90 1 +90 val_90 1 +90 val_90 1 +92 val_92 1 +96 val_96 1 +98 val_98 1 +98 val_98 1 diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out b/ql/src/test/results/clientpositive/spark/bucket4.q.out new file mode 100644 index 0000000..cbc8abb --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -0,0 +1,476 @@ +PREHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket4_1 +POSTHOOK: query: CREATE TABLE bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket4_1 +PREHOOK: query: explain extended +insert overwrite table bucket4_1 +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket4_1 +select * from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket4_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket4_1 + serialization.ddl struct bucket4_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket4_1 + TotalFiles: 2 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket4_1 + serialization.ddl struct bucket4_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket4_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket4_1 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket4_1 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket4_1 +POSTHOOK: Lineage: bucket4_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket4_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket4_1 tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket4_1 tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from bucket4_1 tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket4_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket4_1 tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket4_1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +8 val_8 +10 val_10 +12 val_12 +12 val_12 +18 val_18 +18 val_18 +20 val_20 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +28 val_28 +30 val_30 +34 val_34 +42 val_42 +42 val_42 +44 val_44 +54 val_54 +58 val_58 +58 val_58 +64 val_64 +66 val_66 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +78 val_78 +80 val_80 +82 val_82 +84 val_84 +84 val_84 +86 val_86 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +96 val_96 +98 val_98 +98 val_98 +100 val_100 +100 val_100 +104 val_104 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +120 val_120 +120 val_120 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +134 val_134 +134 val_134 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +146 val_146 +146 val_146 +150 val_150 +152 val_152 +152 val_152 +156 val_156 +158 val_158 +160 val_160 +162 val_162 +164 val_164 +164 val_164 +166 val_166 +168 val_168 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +176 val_176 +176 val_176 +178 val_178 +180 val_180 +186 val_186 +190 val_190 +192 val_192 +194 val_194 +196 val_196 +200 val_200 +200 val_200 +202 val_202 +208 val_208 +208 val_208 +208 val_208 +214 val_214 +216 val_216 +216 val_216 +218 val_218 +222 val_222 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +238 val_238 +238 val_238 +242 val_242 +242 val_242 +244 val_244 +248 val_248 +252 val_252 +256 val_256 +256 val_256 +258 val_258 +260 val_260 +262 val_262 +266 val_266 +272 val_272 +272 val_272 +274 val_274 +278 val_278 +278 val_278 +280 val_280 +280 val_280 +282 val_282 +282 val_282 +284 val_284 +286 val_286 +288 val_288 +288 val_288 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +302 val_302 +306 val_306 +308 val_308 +310 val_310 +316 val_316 +316 val_316 +316 val_316 +318 val_318 +318 val_318 +318 val_318 +322 val_322 +322 val_322 +332 val_332 +336 val_336 +338 val_338 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +366 val_366 +368 val_368 +374 val_374 +378 val_378 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +392 val_392 +394 val_394 +396 val_396 +396 val_396 +396 val_396 +400 val_400 +402 val_402 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +414 val_414 +414 val_414 +418 val_418 +424 val_424 +424 val_424 +430 val_430 +430 val_430 +430 val_430 +432 val_432 +436 val_436 +438 val_438 +438 val_438 +438 val_438 +444 val_444 +446 val_446 +448 val_448 +452 val_452 +454 val_454 +454 val_454 +454 val_454 +458 val_458 +458 val_458 +460 val_460 +462 val_462 +462 val_462 +466 val_466 +466 val_466 +466 val_466 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +470 val_470 +472 val_472 +478 val_478 +478 val_478 +480 val_480 +480 val_480 +480 val_480 +482 val_482 +484 val_484 +490 val_490 +492 val_492 +492 val_492 +494 val_494 +496 val_496 +498 val_498 +498 val_498 +498 val_498 diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out new file mode 100644 index 0000000..4031360 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -0,0 +1,304 @@ +PREHOOK: query: create table abcd (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd +POSTHOOK: query: create table abcd (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@abcd +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@abcd +PREHOOK: query: select * from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select * from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +NULL 35 23 6 +10 1000 50 1 +100 100 10 3 +12 NULL 80 2 +10 100 NULL 5 +10 100 45 4 +12 100 75 7 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + keys: a (type: int), b (type: int), c (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +NULL 1 1 6 +10 2 2 10 +12 1 2 9 +100 1 1 3 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(), count(a), count(b), count(c), count(d), count(DISTINCT a), count(DISTINCT b), count(DISTINCT c), count(DISTINCT d), count(DISTINCT a, b), count(DISTINCT b, c), count(DISTINCT c, d), count(DISTINCT a, d), count(DISTINCT a, c), count(DISTINCT b, d), count(DISTINCT a, b, c), count(DISTINCT b, c, d), count(DISTINCT a, c, d), count(DISTINCT a, b, d), count(DISTINCT a, b, c, d) + keys: a (type: int), b (type: int), c (type: int), d (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: ++++ + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int), c (type: int) + sort order: +++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: int) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +NULL 1 1 6 +10 2 2 10 +12 1 2 9 +100 1 1 3 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int), c (type: int), d (type: int) + sort order: ++++ + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: bigint), _col18 (type: bigint), _col19 (type: bigint), _col20 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 diff --git a/ql/src/test/results/clientpositive/spark/create_merge_compressed.q.out b/ql/src/test/results/clientpositive/spark/create_merge_compressed.q.out new file mode 100644 index 0000000..7c27c6a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/create_merge_compressed.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:tgt_rc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:2 +totalFileSize:342 +maxFileSize:171 +minFileSize:171 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +10 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +46 -751895388 +PREHOOK: query: alter table tgt_rc_merge_test concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: alter table tgt_rc_merge_test concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:tgt_rc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:243 +maxFileSize:243 +minFileSize:243 +#### A masked pattern was here #### + +PREHOOK: query: select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +10 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +#### A masked pattern was here #### +46 -751895388 +PREHOOK: query: drop table src_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: drop table src_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: drop table tgt_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: drop table tgt_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test diff --git a/ql/src/test/results/clientpositive/spark/ctas.q.out b/ql/src/test/results/clientpositive/spark/ctas.q.out new file mode 100644 index 0000000..020b139 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/ctas.q.out @@ -0,0 +1,907 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +create table nzhang_Tmp(a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_Tmp +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +create table nzhang_Tmp(a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_Tmp +PREHOOK: query: select * from nzhang_Tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_Tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_CTAS1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: k string, value string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: nzhang_CTAS1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_CTAS1 +PREHOOK: query: select * from nzhang_CTAS1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_CTAS1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_ctas1 +POSTHOOK: query: describe formatted nzhang_CTAS1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_ctas1 +# col_name data_type comment + +k string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_ctas2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: nzhang_ctas2 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas2 +PREHOOK: query: select * from nzhang_ctas2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_ctas2 +POSTHOOK: query: describe formatted nzhang_CTAS2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_ctas2 +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key / 2) (type: double), concat(value, '_con') (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.nzhang_ctas3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: half_key double, conb string + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde name: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: nzhang_ctas3 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas3 +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_ctas3 +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_ctas3 +# col_name data_type comment + +half_key double +conb string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +STAGE DEPENDENCIES: + +STAGE PLANS: +PREHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_ctas3 +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_ctas3 +# col_name data_type comment + +half_key double +conb string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_ctas4 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: nzhang_ctas4 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas4 +PREHOOK: query: select * from nzhang_ctas4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS4 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_ctas4 +POSTHOOK: query: describe formatted nzhang_CTAS4 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_ctas4 +# col_name data_type comment + +key string +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + field.delim , + serialization.format , +PREHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + +TOK_CREATETABLE + TOK_TABNAME + nzhang_ctas5 + TOK_LIKETABLE + TOK_TABLEROWFORMAT + TOK_SERDEPROPS + TOK_TABLEROWFORMATFIELD + ',' + TOK_TABLEROWFORMATLINES + '\012' + TOK_FILEFORMAT_GENERIC + textfile + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types string:string + field.delim , + line.delim + + name default.nzhang_ctas5 + serialization.format , + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_ctas5 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + input format: org.apache.hadoop.mapred.TextInputFormat + line delimiter: + + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: nzhang_ctas5 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas5 +PREHOOK: query: create table nzhang_ctas6 (key string, `to` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_ctas6 +POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_ctas6 +PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_ctas6 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas6 +POSTHOOK: Lineage: nzhang_ctas6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_ctas6.to SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@nzhang_ctas6 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@nzhang_ctas6 +POSTHOOK: Output: default@nzhang_ctas7 diff --git a/ql/src/test/results/clientpositive/spark/custom_input_output_format.q.out b/ql/src/test/results/clientpositive/spark/custom_input_output_format.q.out new file mode 100644 index 0000000..d7b763a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/custom_input_output_format.q.out @@ -0,0 +1,103 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src1_rot13_iof(key STRING, value STRING) + STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src1_rot13_iof +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src1_rot13_iof(key STRING, value STRING) + STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13InputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.udf.Rot13OutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src1_rot13_iof +PREHOOK: query: DESCRIBE EXTENDED src1_rot13_iof +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src1_rot13_iof +POSTHOOK: query: DESCRIBE EXTENDED src1_rot13_iof +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src1_rot13_iof +key string +value string + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + + + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +PREHOOK: query: INSERT OVERWRITE TABLE src1_rot13_iof SELECT * FROM src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1_rot13_iof +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE src1_rot13_iof SELECT * FROM src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1_rot13_iof +POSTHOOK: Lineage: src1_rot13_iof.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src1_rot13_iof.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM src1_rot13_iof +PREHOOK: type: QUERY +PREHOOK: Input: default@src1_rot13_iof +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1_rot13_iof +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1_rot13_iof +#### A masked pattern was here #### + + + + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 diff --git a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out new file mode 100644 index 0000000..f153fcb --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -0,0 +1,501 @@ +PREHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2_1 +POSTHOOK: query: CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2_1 +PREHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucket2_1 + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket2_1 + TotalFiles: 2 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket2_1 + serialization.ddl struct bucket2_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket2_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket2_1 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket2_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucket2_1 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket2_1 +POSTHOOK: Lineage: bucket2_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket2_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +8 val_8 +10 val_10 +12 val_12 +12 val_12 +18 val_18 +18 val_18 +20 val_20 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +28 val_28 +30 val_30 +34 val_34 +42 val_42 +42 val_42 +44 val_44 +54 val_54 +58 val_58 +58 val_58 +64 val_64 +66 val_66 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +78 val_78 +80 val_80 +82 val_82 +84 val_84 +84 val_84 +86 val_86 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +96 val_96 +98 val_98 +98 val_98 +100 val_100 +100 val_100 +104 val_104 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +120 val_120 +120 val_120 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +134 val_134 +134 val_134 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +146 val_146 +146 val_146 +150 val_150 +152 val_152 +152 val_152 +156 val_156 +158 val_158 +160 val_160 +162 val_162 +164 val_164 +164 val_164 +166 val_166 +168 val_168 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +176 val_176 +176 val_176 +178 val_178 +180 val_180 +186 val_186 +190 val_190 +192 val_192 +194 val_194 +196 val_196 +200 val_200 +200 val_200 +202 val_202 +208 val_208 +208 val_208 +208 val_208 +214 val_214 +216 val_216 +216 val_216 +218 val_218 +222 val_222 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +238 val_238 +238 val_238 +242 val_242 +242 val_242 +244 val_244 +248 val_248 +252 val_252 +256 val_256 +256 val_256 +258 val_258 +260 val_260 +262 val_262 +266 val_266 +272 val_272 +272 val_272 +274 val_274 +278 val_278 +278 val_278 +280 val_280 +280 val_280 +282 val_282 +282 val_282 +284 val_284 +286 val_286 +288 val_288 +288 val_288 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +302 val_302 +306 val_306 +308 val_308 +310 val_310 +316 val_316 +316 val_316 +316 val_316 +318 val_318 +318 val_318 +318 val_318 +322 val_322 +322 val_322 +332 val_332 +336 val_336 +338 val_338 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +366 val_366 +368 val_368 +374 val_374 +378 val_378 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +392 val_392 +394 val_394 +396 val_396 +396 val_396 +396 val_396 +400 val_400 +402 val_402 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +414 val_414 +414 val_414 +418 val_418 +424 val_424 +424 val_424 +430 val_430 +430 val_430 +430 val_430 +432 val_432 +436 val_436 +438 val_438 +438 val_438 +438 val_438 +444 val_444 +446 val_446 +448 val_448 +452 val_452 +454 val_454 +454 val_454 +454 val_454 +458 val_458 +458 val_458 +460 val_460 +462 val_462 +462 val_462 +466 val_466 +466 val_466 +466 val_466 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +470 val_470 +472 val_472 +478 val_478 +478 val_478 +480 val_480 +480 val_480 +480 val_480 +482 val_482 +484 val_484 +490 val_490 +492 val_492 +492 val_492 +494 val_494 +496 val_496 +498 val_498 +498 val_498 +498 val_498