diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out b/ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out new file mode 100644 index 0000000..cfaadd8 --- /dev/null +++ b/ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out @@ -0,0 +1,391 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster +-- JAVA_VERSION_SPECIFIC_OUTPUT + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + list_bucketing_static_part + TOK_PARTSPEC + TOK_PARTVAL + ds + '2008-04-08' + TOK_PARTVAL + hr + '11' + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} +Storage Desc Params: + serialization.format 1