diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 89243fc..ab6bed9 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -474,6 +474,7 @@ spark.query.files=add_part_multiple.q \ sample7.q \ sample8.q \ sample9.q \ + sample10.q \ script_env_var1.q \ script_env_var2.q \ script_pipe.q \ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java index 12a43c0..65cd953 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java @@ -24,36 +24,27 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.apache.spark.TaskContext; -import org.apache.spark.api.java.function.PairFlatMapFunction; import scala.Tuple2; -public class HiveMapFunction implements PairFlatMapFunction>, - HiveKey, BytesWritable> { - private static final long serialVersionUID = 1L; - - private transient JobConf jobConf; +public class HiveMapFunction extends HivePairFlatMapFunction< + Iterator>, + HiveKey, + BytesWritable> { - private byte[] buffer; + private static final long serialVersionUID = 1L; public HiveMapFunction(byte[] buffer) { - this.buffer = buffer; + super(buffer); } @Override public Iterable> call(Iterator> it) throws Exception { - if (jobConf == null) { - jobConf = KryoSerializer.deserializeJobConf(this.buffer); - // set mapred.task.partition in executor side. - jobConf.setInt("mapred.task.partition", TaskContext.get().getPartitionId()); - } + initJobConf(); SparkRecordHandler mapRecordHandler; - // need different record handler for MergeFileWork if (MergeFileMapper.class.getName().equals(jobConf.get(Utilities.MAPRED_MAPPER_CLASS))) { mapRecordHandler = new SparkMergeFileRecordHandler(); @@ -68,4 +59,9 @@ public HiveMapFunction(byte[] buffer) { return result; } + @Override + protected boolean isMap() { + return true; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java new file mode 100644 index 0000000..5a45483 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java @@ -0,0 +1,63 @@ +package org.apache.hadoop.hive.ql.exec.spark; + +import java.text.NumberFormat; + +import org.apache.hadoop.mapred.JobConf; +import org.apache.spark.TaskContext; +import org.apache.spark.api.java.function.PairFlatMapFunction; + + +public abstract class HivePairFlatMapFunction implements PairFlatMapFunction { + + protected transient JobConf jobConf; + + private byte[] buffer; + + protected static final NumberFormat taskIdFormat = NumberFormat.getInstance(); + protected static final NumberFormat stageIdFormat = NumberFormat.getInstance(); + + static { + taskIdFormat.setGroupingUsed(false); + taskIdFormat.setMinimumIntegerDigits(6); + stageIdFormat.setGroupingUsed(false); + stageIdFormat.setMinimumIntegerDigits(4); + } + + public HivePairFlatMapFunction(byte[] buffer) { + this.buffer = buffer; + } + + protected void initJobConf() { + if (jobConf == null) { + jobConf = KryoSerializer.deserializeJobConf(this.buffer); + setupMRLegacyConfigs(); + } + } + + protected abstract boolean isMap(); + + // Some Hive features depends on several MR configuration legacy, build and add + // these configuration to JobConf here. + private void setupMRLegacyConfigs() { + StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); + taskAttemptIdBuilder.append(System.currentTimeMillis()) + .append("_") + .append(stageIdFormat.format(TaskContext.get().getStageId())) + .append("_"); + + if (isMap()) { + taskAttemptIdBuilder.append("m_"); + } else { + taskAttemptIdBuilder.append("r_"); + } + + taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().getPartitionId())) + .append("_") + .append(TaskContext.get().getAttemptId()); + + String taskAttemptIdStr = taskAttemptIdBuilder.toString(); + jobConf.set("mapred.task.id", taskAttemptIdStr); + jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); + jobConf.setInt("mapred.task.partition", TaskContext.get().getPartitionId()); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index 8d34c82..91533be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -20,37 +20,27 @@ import java.util.Iterator; -import org.apache.hadoop.hive.ql.exec.mr.ExecReducer; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.apache.spark.TaskContext; -import org.apache.spark.api.java.function.PairFlatMapFunction; import scala.Tuple2; -public class HiveReduceFunction implements PairFlatMapFunction< - Iterator>>, HiveKey, BytesWritable> { - private static final long serialVersionUID = 1L; - - private transient JobConf jobConf; +public class HiveReduceFunction extends HivePairFlatMapFunction< + Iterator>>, + HiveKey, + BytesWritable> { - private byte[] buffer; + private static final long serialVersionUID = 1L; public HiveReduceFunction(byte[] buffer) { - this.buffer = buffer; + super(buffer); } @Override public Iterable> call(Iterator>> it) throws Exception { - if (jobConf == null) { - jobConf = KryoSerializer.deserializeJobConf(this.buffer); - // set mapred.task.partition in executor side. - jobConf.setInt("mapred.task.partition", TaskContext.get().getPartitionId()); - } - + initJobConf(); SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler(); HiveReduceFunctionResultList result = new HiveReduceFunctionResultList(jobConf, it, reducerRecordhandler); @@ -58,4 +48,9 @@ public HiveReduceFunction(byte[] buffer) { return result; } + + @Override + protected boolean isMap() { + return false; + } } diff --git ql/src/test/results/clientpositive/spark/sample10.q.out ql/src/test/results/clientpositive/spark/sample10.q.out new file mode 100644 index 0000000..71bc523 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sample10.q.out @@ -0,0 +1,471 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpartbucket +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpartbucket +PREHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpartbucket +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended +select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpartbucket + TOK_TABLEBUCKETSAMPLE + 1 + 4 + TOK_TABLE_OR_COL + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_FUNCTION + count + 1 + TOK_WHERE + TOK_FUNCTION + TOK_ISNOTNULL + TOK_TABLE_OR_COL + ds + TOK_GROUPBY + TOK_TABLE_OR_COL + ds + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpartbucket + Statistics: Num rows: 12 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: true + predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 702 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: 000000_26 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + numFiles 4 + numRows -1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize -1 + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 351 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.srcpartbucket + name: default.srcpartbucket +#### A masked pattern was here #### + Partition + base file name: 000000_26 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + numFiles 4 + numRows -1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize -1 + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 351 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.srcpartbucket + name: default.srcpartbucket +#### A masked pattern was here #### + Partition + base file name: 000000_26 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + numFiles 4 + numRows -1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize -1 + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 351 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.srcpartbucket + name: default.srcpartbucket +#### A masked pattern was here #### + Partition + base file name: 000000_26 + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + numFiles 4 + numRows -1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize -1 + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 351 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.srcpartbucket + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpartbucket { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.srcpartbucket + name: default.srcpartbucket + Truncated Path -> Alias: + /srcpartbucket/ds=2008-04-08/hr=11/000000_26 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=12/000000_26 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=11/000000_26 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=12/000000_26 [srcpartbucket] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 351 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 351 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 3 Data size: 351 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 351 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 351 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 10 +2008-04-09 10 +PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 12 +2008-04-09 12 +PREHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +2 val_2 2008-04-08 11 +2 val_2 2008-04-08 12 +2 val_2 2008-04-09 11 +2 val_2 2008-04-09 12 +4 val_4 2008-04-08 11 +4 val_4 2008-04-08 12 +4 val_4 2008-04-09 11 +4 val_4 2008-04-09 12 +5 val_5 2008-04-08 11 +5 val_5 2008-04-08 11 +5 val_5 2008-04-08 11 +5 val_5 2008-04-08 12 +5 val_5 2008-04-08 12 +5 val_5 2008-04-08 12 +5 val_5 2008-04-09 11 +5 val_5 2008-04-09 11 +5 val_5 2008-04-09 11 +5 val_5 2008-04-09 12 +5 val_5 2008-04-09 12 +5 val_5 2008-04-09 12 +8 val_8 2008-04-08 11 +8 val_8 2008-04-08 12 +8 val_8 2008-04-09 11 +8 val_8 2008-04-09 12 +9 val_9 2008-04-08 11 +9 val_9 2008-04-08 12 +9 val_9 2008-04-09 11 +9 val_9 2008-04-09 12