diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 02d8e9008d..12dad9f358 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -628,6 +628,7 @@ minillaplocal.query.files=\ results_cache_transactional.q,\ results_cache_with_masking.q,\ sample10.q,\ + sample10_mm.q,\ schema_evol_orc_acid_part_llap_io.q,\ schema_evol_orc_acid_part.q,\ schema_evol_orc_acid_part_update_llap_io.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 9dbd869d57..136709c6dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -379,7 +379,18 @@ public Path getBucketPath(int bucketNum) { if (srcs == null) { return null; } - return srcs[bucketNum].getPath(); + + // Compute bucketid from srcs and return the 1st match. + for (FileStatus src : srcs) { + String bucketName = src.getPath().getName(); + String bucketIdStr = Utilities.getBucketFileNameFromPathSubString(bucketName); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + if (bucketId == bucketNum) { + // match, return + return src.getPath(); + } + } + return null; } @SuppressWarnings("nls") diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java index 8200e6a237..75bce638a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Stack; +import org.apache.hadoop.hive.metastore.TableType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -192,8 +193,8 @@ public static NodeProcessor getDefaultProc() { // check if input pruning is possible // TODO: this code is buggy - it relies on having one file per bucket; no MM support (by design). - boolean isMmTable = AcidUtils.isInsertOnlyTable(part.getTable().getParameters()); - if (sampleDescr.getInputPruning() && !isMmTable) { + boolean isManagedTable = part.getTable().getTableType() == TableType.MANAGED_TABLE; + if (sampleDescr.getInputPruning() && !isManagedTable) { LOG.trace("numerator = " + num); LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount); @@ -220,7 +221,7 @@ public static NodeProcessor getDefaultProc() { } } else { // need to do full scan - fullScanMsg = isMmTable ? "MM table" : "Tablesample not on clustered columns"; + fullScanMsg = isManagedTable ? "Managed table" : "Tablesample not on clustered columns"; } LOG.warn(fullScanMsg + ", using full table scan"); Path[] ret = part.getPath(); diff --git a/ql/src/test/queries/clientpositive/sample10_mm.q b/ql/src/test/queries/clientpositive/sample10_mm.q new file mode 100644 index 0000000000..f653e67875 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sample10_mm.q @@ -0,0 +1,34 @@ +--! qt:dataset:srcpart +set hive.mapred.mode=nonstrict; +set hive.exec.submitviachild=false; +set hive.exec.submit.local.task.via.child=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +set hive.exec.reducers.max=4; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set hive.default.fileformat=RCFILE; +set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables,org.apache.hadoop.hive.ql.hooks.UpdateInputAccessTimeHook$PreExec; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10; + + +select * from srcpartbucket; +explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key); +select key from srcpartbucket tablesample (bucket 1 out of 4 on key); +select key from srcpartbucket tablesample (bucket 2 out of 4 on key); +select key from srcpartbucket tablesample (bucket 3 out of 4 on key); +select key from srcpartbucket tablesample (bucket 4 out of 4 on key); + +explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key; + diff --git a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out index e4b390c9cd..140da7bf35 100644 --- a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out +++ b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out @@ -215,7 +215,52 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@harbucket POSTHOOK: Input: default@harbucket@ds=1 #### A masked pattern was here #### +51 +51 +54 +69 +96 +133 +162 +163 +165 +165 +237 +237 +238 +238 +256 +256 260 +289 +311 +311 +311 +332 +344 +344 +362 +369 +369 +369 +393 +397 +397 +407 +411 +432 +435 +453 +454 +454 +454 +466 +466 +466 +484 +498 +498 +498 PREHOOK: query: ALTER TABLE tstsrcpart_n2 ARCHIVE PARTITION (ds='2008-04-08', hr='12') PREHOOK: type: ALTERTABLE_ARCHIVE PREHOOK: Input: default@tstsrcpart_n2 @@ -234,7 +279,52 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@harbucket POSTHOOK: Input: default@harbucket@ds=1 #### A masked pattern was here #### +51 +51 +54 +69 +96 +133 +162 +163 +165 +165 +237 +237 +238 +238 +256 +256 260 +289 +311 +311 +311 +332 +344 +344 +362 +369 +369 +369 +393 +397 +397 +407 +411 +432 +435 +453 +454 +454 +454 +466 +466 +466 +484 +498 +498 +498 PREHOOK: query: ALTER TABLE tstsrcpart_n2 UNARCHIVE PARTITION (ds='2008-04-08', hr='12') PREHOOK: type: ALTERTABLE_UNARCHIVE PREHOOK: Input: default@tstsrcpart_n2 @@ -253,7 +343,52 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@harbucket POSTHOOK: Input: default@harbucket@ds=1 #### A masked pattern was here #### +51 +51 +54 +69 +96 +133 +162 +163 +165 +165 +237 +237 +238 +238 +256 +256 260 +289 +311 +311 +311 +332 +344 +344 +362 +369 +369 +369 +393 +397 +397 +407 +411 +432 +435 +453 +454 +454 +454 +466 +466 +466 +484 +498 +498 +498 PREHOOK: query: CREATE TABLE old_name(key INT) PARTITIONED by (ds STRING) PREHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index afc7e95d67..51932e9431 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -1931,7 +1931,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1981,7 +1981,7 @@ STAGE PLANS: name: default.test_table1_n1 name: default.test_table1_n1 Truncated Path -> Alias: - /test_table1_n1/ds=1/000001_0 [test_table1_n1] + /test_table1_n1/ds=1 [test_table1_n1] Stage: Stage-0 Fetch Operator @@ -2042,7 +2042,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -2091,7 +2091,7 @@ STAGE PLANS: name: default.test_table3_n1 name: default.test_table3_n1 Truncated Path -> Alias: - /test_table3_n1/ds=1/000001_0 [test_table3_n1] + /test_table3_n1/ds=1 [test_table3_n1] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/sample10.q.out b/ql/src/test/results/clientpositive/llap/sample10.q.out index 1b95314980..cbceadca02 100644 --- a/ql/src/test/results/clientpositive/llap/sample10.q.out +++ b/ql/src/test/results/clientpositive/llap/sample10.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -140,7 +140,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -191,7 +191,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -242,7 +242,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -292,10 +292,10 @@ STAGE PLANS: name: default.srcpartbucket name: default.srcpartbucket Truncated Path -> Alias: - /srcpartbucket/ds=2008-04-08/hr=11/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-08/hr=12/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=11/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=12/000002_0 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=12 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=12 [srcpartbucket] Reducer 2 Execution mode: vectorized, llap Needs Tagging: false @@ -367,6 +367,8 @@ POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### +2008-04-08 14 +2008-04-09 14 PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket @@ -383,6 +385,8 @@ POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### +2008-04-08 4 +2008-04-09 4 PREHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket diff --git a/ql/src/test/results/clientpositive/llap/sample10_mm.q.out b/ql/src/test/results/clientpositive/llap/sample10_mm.q.out new file mode 100644 index 0000000000..28d0cd052f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/sample10_mm.q.out @@ -0,0 +1,346 @@ +PREHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpartbucket +POSTHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpartbucket +PREHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpartbucket +POSTHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from srcpartbucket +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpartbucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 val_5 2008-04-08 11 +2 val_2 2008-04-08 11 +5 val_5 2008-04-08 11 +5 val_5 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +9 val_9 2008-04-08 11 +8 val_8 2008-04-08 11 +4 val_4 2008-04-08 11 +5 val_5 2008-04-08 12 +2 val_2 2008-04-08 12 +5 val_5 2008-04-08 12 +5 val_5 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +9 val_9 2008-04-08 12 +8 val_8 2008-04-08 12 +4 val_4 2008-04-08 12 +5 val_5 2008-04-09 11 +2 val_2 2008-04-09 11 +5 val_5 2008-04-09 11 +5 val_5 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +9 val_9 2008-04-09 11 +8 val_8 2008-04-09 11 +4 val_4 2008-04-09 11 +5 val_5 2008-04-09 12 +2 val_2 2008-04-09 12 +5 val_5 2008-04-09 12 +5 val_5 2008-04-09 12 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +9 val_9 2008-04-09 12 +8 val_8 2008-04-09 12 +4 val_4 2008-04-09 12 +PREHOOK: query: explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpartbucket + Filter Operator + predicate: (((hash(key) & 2147483647) % 4) = 1) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +9 +8 +9 +8 +9 +8 +9 +8 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +4 +4 +4 +4 +PREHOOK: query: explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpartbucket + Statistics: Num rows: 40 Data size: 3400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((hash(key) & 2147483647) % 4) = 1) (type: boolean) + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 +0 +2 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +9 +8 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +4 diff --git a/ql/src/test/results/clientpositive/masking_5.q.out b/ql/src/test/results/clientpositive/masking_5.q.out index 498fc117c7..420a680752 100644 --- a/ql/src/test/results/clientpositive/masking_5.q.out +++ b/ql/src/test/results/clientpositive/masking_5.q.out @@ -788,3 +788,127 @@ POSTHOOK: Input: default@masking_test_n6 484 val_484 98 val_98 86 val_86 +327 val_327 +437 val_437 +97 val_97 +435 val_435 +169 val_169 +19 val_19 +307 val_307 +255 val_255 +169 val_169 +307 val_307 +419 val_419 +399 val_399 +153 val_153 +287 val_287 +315 val_315 +97 val_97 +427 val_427 +369 val_369 +341 val_341 +77 val_77 +85 val_85 +169 val_169 +409 val_409 +369 val_369 +67 val_67 +137 val_137 +409 val_409 +407 val_407 +421 val_421 +133 val_133 +187 val_187 +233 val_233 +197 val_197 +221 val_221 +417 val_417 +353 val_353 +83 val_83 +249 val_249 +157 val_157 +317 val_317 +165 val_165 +325 val_325 +443 val_443 +169 val_169 +199 val_199 +417 val_417 +479 val_479 +43 val_43 +237 val_237 +491 val_491 +51 val_51 +119 val_119 +149 val_149 +163 val_163 +255 val_255 +351 val_351 +327 val_327 +291 val_291 +165 val_165 +397 val_397 +57 val_57 +187 val_187 +191 val_191 +199 val_199 +311 val_311 +201 val_201 +393 val_393 +197 val_197 +339 val_339 +119 val_119 +113 val_113 +17 val_17 +409 val_409 +105 val_105 +483 val_483 +463 val_463 +195 val_195 +325 val_325 +463 val_463 +229 val_229 +411 val_411 +327 val_327 +467 val_467 +365 val_365 +191 val_191 +397 val_397 +377 val_377 +353 val_353 +485 val_485 +239 val_239 +495 val_495 +113 val_113 +417 val_417 +67 val_67 +181 val_181 +69 val_69 +399 val_399 +83 val_83 +453 val_453 +137 val_137 +449 val_449 +149 val_149 +311 val_311 +41 val_41 +65 val_65 +119 val_119 +221 val_221 +289 val_289 +195 val_195 +199 val_199 +233 val_233 +229 val_229 +239 val_239 +27 val_27 +317 val_317 +51 val_51 +459 val_459 +497 val_497 +311 val_311 +177 val_177 +237 val_237 +187 val_187 +459 val_459 +369 val_369 diff --git a/ql/src/test/results/clientpositive/sample6.q.out b/ql/src/test/results/clientpositive/sample6.q.out index 7f853e55c5..a6a6f2cb90 100644 --- a/ql/src/test/results/clientpositive/sample6.q.out +++ b/ql/src/test/results/clientpositive/sample6.q.out @@ -95,7 +95,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -144,7 +144,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -437,55 +437,125 @@ POSTHOOK: Input: default@dest1_n27 6 val_7 10 val_10 10 val_11 +17 val_17 +21 val_22 +21 val_22 +21 val_22 +21 val_22 +27 val_27 30 val_30 30 val_31 +31 val_32 40 val_41 40 val_41 +51 val_51 +51 val_51 +51 val_52 +57 val_57 58 val_58 58 val_58 58 val_59 58 val_59 64 val_64 +65 val_65 +65 val_66 +65 val_66 70 val_70 70 val_70 70 val_70 70 val_71 80 val_80 80 val_81 +83 val_83 +83 val_83 86 val_86 86 val_87 90 val_90 90 val_90 90 val_90 +91 val_92 98 val_98 98 val_98 +105 val_105 +105 val_106 +105 val_106 110 val_111 +113 val_113 +113 val_113 116 val_116 116 val_117 +117 val_118 +117 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_120 +119 val_120 +119 val_120 +121 val_122 +121 val_122 +123 val_124 +123 val_124 126 val_126 126 val_127 126 val_127 134 val_134 134 val_134 134 val_135 +137 val_137 +137 val_137 +137 val_138 +153 val_153 +153 val_154 +153 val_154 156 val_156 156 val_157 156 val_157 +157 val_157 +157 val_158 +157 val_158 158 val_158 +163 val_163 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +177 val_177 +177 val_178 +177 val_178 178 val_178 178 val_179 178 val_179 184 val_185 +187 val_187 +187 val_187 +187 val_187 +195 val_195 +195 val_195 +197 val_197 +197 val_197 +197 val_198 206 val_207 206 val_207 206 val_207 208 val_208 208 val_208 208 val_208 +221 val_221 +221 val_221 +229 val_229 +229 val_229 +237 val_237 +237 val_237 +243 val_244 +243 val_244 244 val_244 244 val_245 244 val_245 244 val_245 +249 val_249 +249 val_250 +249 val_250 252 val_252 252 val_253 254 val_255 @@ -493,27 +563,40 @@ POSTHOOK: Input: default@dest1_n27 256 val_256 256 val_257 266 val_266 +271 val_272 272 val_272 272 val_272 272 val_273 286 val_286 286 val_287 +289 val_289 +289 val_290 292 val_292 292 val_293 292 val_293 304 val_305 +307 val_307 +307 val_307 308 val_308 308 val_309 308 val_309 +315 val_315 316 val_316 316 val_316 316 val_316 +317 val_317 +317 val_317 +317 val_318 326 val_327 +327 val_327 +327 val_327 +327 val_327 334 val_335 336 val_336 336 val_337 338 val_338 338 val_339 +339 val_339 342 val_342 342 val_342 342 val_343 @@ -526,21 +609,45 @@ POSTHOOK: Input: default@dest1_n27 348 val_348 348 val_348 348 val_349 +349 val_350 +349 val_350 +349 val_350 +349 val_350 352 val_353 352 val_353 +353 val_353 +353 val_353 +353 val_354 +355 val_356 +355 val_356 360 val_360 360 val_361 362 val_362 364 val_364 364 val_365 +369 val_369 +369 val_369 +369 val_369 +369 val_370 +371 val_372 +371 val_372 +371 val_372 +371 val_372 +377 val_377 378 val_378 378 val_379 +391 val_392 +391 val_392 392 val_392 392 val_393 392 val_393 396 val_396 396 val_396 396 val_396 +399 val_399 +399 val_399 +399 val_400 +399 val_400 402 val_402 402 val_403 402 val_403 @@ -550,15 +657,31 @@ POSTHOOK: Input: default@dest1_n27 404 val_405 404 val_405 404 val_405 +407 val_407 +407 val_408 +407 val_408 +407 val_408 408 val_409 408 val_409 410 val_411 +417 val_417 +417 val_417 +417 val_417 +419 val_419 +423 val_424 426 val_427 +427 val_427 +427 val_428 +427 val_428 440 val_441 440 val_441 +449 val_449 452 val_452 458 val_458 458 val_458 +463 val_463 +463 val_463 +463 val_464 466 val_466 466 val_466 466 val_466 @@ -569,10 +692,14 @@ POSTHOOK: Input: default@dest1_n27 478 val_478 478 val_479 478 val_479 +479 val_479 482 val_482 482 val_483 484 val_484 484 val_485 +497 val_497 +497 val_498 +497 val_498 PREHOOK: query: EXPLAIN EXTENDED SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 4 OUT OF 4 on key) s ORDER BY key, value PREHOOK: type: QUERY @@ -613,7 +740,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -662,7 +789,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000001_0 [s] + /srcbucket [s] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -712,11 +839,20 @@ POSTHOOK: Input: default@srcbucket 5 val_5 5 val_5 5 val_6 +12 val_12 +12 val_12 +12 val_13 15 val_15 15 val_15 15 val_16 15 val_16 +16 val_17 +16 val_17 +22 val_23 23 val_24 +24 val_24 +24 val_24 +28 val_28 33 val_33 33 val_34 35 val_35 @@ -725,20 +861,47 @@ POSTHOOK: Input: default@srcbucket 35 val_36 35 val_36 35 val_36 +42 val_42 +42 val_42 +42 val_43 +42 val_43 +42 val_43 +44 val_44 47 val_47 47 val_48 49 val_50 49 val_50 +50 val_51 +52 val_53 +52 val_53 +52 val_53 +52 val_53 53 val_53 53 val_54 +56 val_57 63 val_64 75 val_76 +76 val_76 +76 val_76 +76 val_77 +76 val_77 +76 val_77 87 val_87 87 val_88 87 val_88 +94 val_95 95 val_95 95 val_95 +104 val_104 +104 val_104 +104 val_105 +104 val_105 +104 val_105 111 val_111 +114 val_114 +114 val_115 +114 val_115 +114 val_115 125 val_125 125 val_125 125 val_126 @@ -746,6 +909,12 @@ POSTHOOK: Input: default@srcbucket 129 val_129 129 val_130 129 val_130 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_139 +138 val_139 145 val_145 147 val_148 147 val_148 @@ -755,13 +924,28 @@ POSTHOOK: Input: default@srcbucket 161 val_162 161 val_162 161 val_162 +166 val_166 167 val_167 167 val_167 167 val_167 167 val_168 +168 val_168 +168 val_169 +170 val_170 +170 val_171 +172 val_172 +172 val_172 +172 val_173 +180 val_180 +192 val_192 +192 val_193 193 val_193 193 val_193 193 val_193 +196 val_196 +196 val_197 +196 val_197 +196 val_197 203 val_203 203 val_203 207 val_207 @@ -775,17 +959,25 @@ POSTHOOK: Input: default@srcbucket 213 val_214 219 val_219 219 val_219 +224 val_224 +224 val_224 +224 val_225 227 val_228 241 val_241 241 val_242 241 val_242 241 val_242 241 val_242 +242 val_242 +242 val_242 +242 val_243 245 val_246 245 val_246 257 val_257 257 val_258 257 val_258 +258 val_258 +258 val_259 259 val_260 259 val_260 263 val_263 @@ -793,10 +985,16 @@ POSTHOOK: Input: default@srcbucket 265 val_265 265 val_266 267 val_268 +268 val_269 273 val_273 273 val_273 273 val_273 273 val_274 +274 val_274 +274 val_275 +278 val_278 +278 val_278 +278 val_279 281 val_281 281 val_281 281 val_282 @@ -805,8 +1003,18 @@ POSTHOOK: Input: default@srcbucket 283 val_283 293 val_294 293 val_294 +300 val_301 +300 val_301 303 val_304 303 val_304 +318 val_318 +318 val_318 +318 val_318 +318 val_319 +322 val_322 +322 val_322 +322 val_323 +330 val_331 331 val_331 331 val_331 331 val_332 @@ -814,6 +1022,10 @@ POSTHOOK: Input: default@srcbucket 335 val_335 335 val_336 335 val_336 +356 val_356 +356 val_357 +356 val_357 +358 val_359 367 val_367 367 val_367 367 val_368 @@ -823,8 +1035,15 @@ POSTHOOK: Input: default@srcbucket 379 val_379 379 val_380 381 val_382 +382 val_382 +382 val_382 +382 val_383 +382 val_383 385 val_386 385 val_386 +390 val_391 +390 val_391 +390 val_391 395 val_395 395 val_395 395 val_396 @@ -839,7 +1058,16 @@ POSTHOOK: Input: default@srcbucket 403 val_403 403 val_403 405 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_407 +412 val_413 +412 val_413 415 val_416 +416 val_417 +418 val_418 429 val_429 429 val_429 429 val_430 @@ -848,8 +1076,21 @@ POSTHOOK: Input: default@srcbucket 431 val_431 431 val_431 431 val_432 +436 val_436 +436 val_437 441 val_442 447 val_448 +448 val_448 +448 val_449 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_469 +468 val_469 +468 val_469 +470 val_470 +470 val_471 475 val_475 475 val_476 481 val_481 @@ -859,6 +1100,13 @@ POSTHOOK: Input: default@srcbucket 489 val_489 489 val_489 489 val_490 +492 val_492 +492 val_492 +492 val_493 +492 val_493 +494 val_494 +494 val_495 +494 val_495 PREHOOK: query: EXPLAIN EXTENDED SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s ORDER BY key, value PREHOOK: type: QUERY @@ -899,7 +1147,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -948,7 +1196,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -996,75 +1244,185 @@ POSTHOOK: Input: default@srcbucket #### A masked pattern was here #### 2 val_2 2 val_3 +3 val_4 6 val_7 6 val_7 10 val_10 10 val_11 +17 val_17 +19 val_19 +19 val_20 20 val_20 20 val_21 20 val_21 +21 val_22 +21 val_22 +21 val_22 +21 val_22 +27 val_27 +29 val_30 +29 val_30 30 val_30 30 val_31 +31 val_32 40 val_41 40 val_41 +41 val_41 +43 val_43 46 val_47 48 val_49 48 val_49 +51 val_51 +51 val_51 +51 val_52 54 val_54 +57 val_57 58 val_58 58 val_58 58 val_59 58 val_59 +59 val_60 60 val_61 +61 val_62 64 val_64 +65 val_65 +65 val_66 +65 val_66 +67 val_67 +67 val_67 68 val_69 +69 val_69 +69 val_70 70 val_70 70 val_70 70 val_70 70 val_71 +77 val_77 +77 val_78 +77 val_78 80 val_80 80 val_81 +83 val_83 +83 val_83 84 val_84 84 val_84 +85 val_85 +85 val_86 86 val_86 86 val_87 +89 val_90 +89 val_90 +89 val_90 90 val_90 90 val_90 90 val_90 +91 val_92 +93 val_94 +93 val_94 +93 val_94 96 val_96 +97 val_97 +97 val_97 +97 val_98 +97 val_98 98 val_98 98 val_98 +99 val_100 +101 val_102 +105 val_105 +105 val_106 +105 val_106 106 val_107 110 val_111 +113 val_113 +113 val_113 116 val_116 116 val_117 +117 val_118 +117 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_120 +119 val_120 +119 val_120 +121 val_122 +121 val_122 +123 val_124 +123 val_124 126 val_126 126 val_127 126 val_127 132 val_133 132 val_133 +133 val_133 +133 val_134 134 val_134 134 val_134 134 val_135 +135 val_136 +135 val_136 +135 val_136 +137 val_137 +137 val_137 +137 val_138 140 val_141 146 val_146 146 val_146 +149 val_149 +149 val_149 +149 val_150 +153 val_153 +153 val_154 +153 val_154 156 val_156 156 val_157 156 val_157 +157 val_157 +157 val_158 +157 val_158 158 val_158 162 val_162 162 val_163 +163 val_163 164 val_164 164 val_164 164 val_165 164 val_165 +165 val_165 +165 val_165 +165 val_166 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +177 val_177 +177 val_178 +177 val_178 178 val_178 178 val_179 178 val_179 +181 val_181 182 val_183 184 val_185 +185 val_186 +187 val_187 +187 val_187 +187 val_187 190 val_190 +191 val_191 +191 val_191 +191 val_192 +195 val_195 +195 val_195 +197 val_197 +197 val_197 +197 val_198 +199 val_199 +199 val_199 +199 val_199 +199 val_200 +201 val_201 202 val_202 206 val_207 206 val_207 @@ -1074,34 +1432,57 @@ POSTHOOK: Input: default@srcbucket 208 val_208 212 val_213 214 val_214 +215 val_216 216 val_216 216 val_216 216 val_217 +221 val_221 +221 val_221 226 val_226 226 val_227 226 val_227 226 val_227 226 val_227 +229 val_229 +229 val_229 +231 val_232 +233 val_233 +233 val_233 +237 val_237 +237 val_237 238 val_238 238 val_238 238 val_239 +239 val_239 +239 val_239 +239 val_240 +239 val_240 240 val_241 +243 val_244 +243 val_244 244 val_244 244 val_245 244 val_245 244 val_245 248 val_248 248 val_249 +249 val_249 +249 val_250 +249 val_250 252 val_252 252 val_253 254 val_255 +255 val_255 +255 val_255 256 val_256 256 val_256 256 val_257 260 val_260 260 val_261 260 val_261 +261 val_262 266 val_266 +271 val_272 272 val_272 272 val_272 272 val_273 @@ -1111,10 +1492,20 @@ POSTHOOK: Input: default@srcbucket 284 val_285 286 val_286 286 val_287 +287 val_287 +287 val_288 +287 val_288 +289 val_289 +289 val_290 +291 val_291 +291 val_292 +291 val_292 292 val_292 292 val_293 292 val_293 304 val_305 +307 val_307 +307 val_307 308 val_308 308 val_309 308 val_309 @@ -1122,37 +1513,81 @@ POSTHOOK: Input: default@srcbucket 310 val_311 310 val_311 310 val_311 +311 val_311 +311 val_311 +311 val_311 +313 val_314 +315 val_315 316 val_316 316 val_316 316 val_316 +317 val_317 +317 val_317 +317 val_318 324 val_325 +325 val_325 +325 val_325 326 val_327 +327 val_327 +327 val_327 +327 val_327 332 val_332 334 val_335 336 val_336 336 val_337 +337 val_338 338 val_338 338 val_339 +339 val_339 +341 val_341 +341 val_342 +341 val_342 +341 val_342 342 val_342 342 val_342 342 val_343 +343 val_344 344 val_344 344 val_344 344 val_345 +347 val_348 +347 val_348 348 val_348 348 val_348 348 val_348 348 val_348 348 val_348 348 val_349 +349 val_350 +349 val_350 +349 val_350 +349 val_350 +351 val_351 +351 val_352 +351 val_352 352 val_353 352 val_353 +353 val_353 +353 val_353 +353 val_354 +355 val_356 +355 val_356 360 val_360 360 val_361 362 val_362 364 val_364 364 val_365 +365 val_365 368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_370 +371 val_372 +371 val_372 +371 val_372 +371 val_372 +377 val_377 378 val_378 378 val_379 384 val_384 @@ -1165,13 +1600,24 @@ POSTHOOK: Input: default@srcbucket 386 val_387 386 val_387 388 val_389 +391 val_392 +391 val_392 392 val_392 392 val_393 392 val_393 +393 val_393 +393 val_394 +393 val_394 394 val_394 396 val_396 396 val_396 396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_400 +399 val_400 402 val_402 402 val_403 402 val_403 @@ -1181,13 +1627,36 @@ POSTHOOK: Input: default@srcbucket 404 val_405 404 val_405 404 val_405 +407 val_407 +407 val_408 +407 val_408 +407 val_408 408 val_409 408 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_410 +409 val_410 410 val_411 +411 val_411 +411 val_412 414 val_414 414 val_414 414 val_415 +417 val_417 +417 val_417 +417 val_417 +419 val_419 +421 val_421 +421 val_422 +421 val_422 +423 val_424 +425 val_426 426 val_427 +427 val_427 +427 val_428 +427 val_428 428 val_429 430 val_430 430 val_430 @@ -1195,13 +1664,24 @@ POSTHOOK: Input: default@srcbucket 430 val_431 432 val_432 432 val_433 +435 val_435 +435 val_436 +437 val_437 +437 val_438 440 val_441 440 val_441 +443 val_443 +443 val_444 +443 val_444 +443 val_444 444 val_444 446 val_446 446 val_447 446 val_447 +449 val_449 452 val_452 +453 val_453 +453 val_454 454 val_454 454 val_454 454 val_454 @@ -1209,10 +1689,19 @@ POSTHOOK: Input: default@srcbucket 454 val_455 458 val_458 458 val_458 +459 val_459 +459 val_459 +459 val_460 +463 val_463 +463 val_463 +463 val_464 466 val_466 466 val_466 466 val_466 +467 val_467 +467 val_468 472 val_472 +473 val_474 474 val_475 474 val_475 476 val_477 @@ -1221,6 +1710,7 @@ POSTHOOK: Input: default@srcbucket 478 val_478 478 val_479 478 val_479 +479 val_479 480 val_480 480 val_480 480 val_480 @@ -1228,11 +1718,23 @@ POSTHOOK: Input: default@srcbucket 480 val_481 482 val_482 482 val_483 +483 val_483 484 val_484 484 val_485 +485 val_485 +485 val_486 +485 val_486 488 val_489 490 val_490 490 val_491 +491 val_491 +491 val_492 +491 val_492 +495 val_495 +495 val_496 +497 val_497 +497 val_498 +497 val_498 498 val_498 498 val_498 498 val_498 @@ -2239,57 +2741,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count 4 - bucket_field_name key - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket2 - numFiles 4 - numRows 500 - rawDataSize 5312 - serialization.ddl struct srcbucket2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count 4 - bucket_field_name key - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket2 - numFiles 4 - numRows 500 - rawDataSize 5312 - serialization.ddl struct srcbucket2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket2 - name: default.srcbucket2 -#### A masked pattern was here #### - Partition - base file name: 000002_0 + base file name: srcbucket2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2338,8 +2790,7 @@ STAGE PLANS: name: default.srcbucket2 name: default.srcbucket2 Truncated Path -> Alias: - /srcbucket2/000000_0 [s] - /srcbucket2/000002_0 [s] + /srcbucket2 [s] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -2387,38 +2838,91 @@ POSTHOOK: Input: default@srcbucket2 #### A masked pattern was here #### 2 val_2 10 val_10 +17 val_17 +19 val_19 20 val_20 +27 val_27 30 val_30 +41 val_41 +43 val_43 +51 val_51 +51 val_51 54 val_54 +57 val_57 58 val_58 58 val_58 64 val_64 +65 val_65 +67 val_67 +67 val_67 +69 val_69 70 val_70 70 val_70 70 val_70 +77 val_77 80 val_80 +83 val_83 +83 val_83 84 val_84 84 val_84 +85 val_85 86 val_86 90 val_90 90 val_90 90 val_90 96 val_96 +97 val_97 +97 val_97 98 val_98 98 val_98 +105 val_105 +113 val_113 +113 val_113 116 val_116 +119 val_119 +119 val_119 +119 val_119 126 val_126 +133 val_133 134 val_134 134 val_134 +137 val_137 +137 val_137 146 val_146 146 val_146 +149 val_149 +149 val_149 +153 val_153 156 val_156 +157 val_157 158 val_158 162 val_162 +163 val_163 164 val_164 164 val_164 +165 val_165 +165 val_165 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +177 val_177 178 val_178 +181 val_181 +187 val_187 +187 val_187 +187 val_187 190 val_190 +191 val_191 +191 val_191 +195 val_195 +195 val_195 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +201 val_201 202 val_202 208 val_208 208 val_208 @@ -2426,12 +2930,25 @@ POSTHOOK: Input: default@srcbucket2 214 val_214 216 val_216 216 val_216 +221 val_221 +221 val_221 226 val_226 +229 val_229 +229 val_229 +233 val_233 +233 val_233 +237 val_237 +237 val_237 238 val_238 238 val_238 +239 val_239 +239 val_239 244 val_244 248 val_248 +249 val_249 252 val_252 +255 val_255 +255 val_255 256 val_256 256 val_256 260 val_260 @@ -2440,15 +2957,33 @@ POSTHOOK: Input: default@srcbucket2 272 val_272 284 val_284 286 val_286 +287 val_287 +289 val_289 +291 val_291 292 val_292 +307 val_307 +307 val_307 308 val_308 310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 316 val_316 316 val_316 316 val_316 +317 val_317 +317 val_317 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 332 val_332 336 val_336 338 val_338 +339 val_339 +341 val_341 342 val_342 342 val_342 344 val_344 @@ -2458,49 +2993,89 @@ POSTHOOK: Input: default@srcbucket2 348 val_348 348 val_348 348 val_348 +351 val_351 +353 val_353 +353 val_353 360 val_360 362 val_362 364 val_364 +365 val_365 368 val_368 +369 val_369 +369 val_369 +369 val_369 +377 val_377 378 val_378 384 val_384 384 val_384 384 val_384 386 val_386 392 val_392 +393 val_393 394 val_394 396 val_396 396 val_396 396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 402 val_402 404 val_404 404 val_404 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +411 val_411 414 val_414 414 val_414 +417 val_417 +417 val_417 +417 val_417 +419 val_419 +421 val_421 +427 val_427 430 val_430 430 val_430 430 val_430 432 val_432 +435 val_435 +437 val_437 +443 val_443 444 val_444 446 val_446 +449 val_449 452 val_452 +453 val_453 454 val_454 454 val_454 454 val_454 458 val_458 458 val_458 +459 val_459 +459 val_459 +463 val_463 +463 val_463 466 val_466 466 val_466 466 val_466 +467 val_467 472 val_472 478 val_478 478 val_478 +479 val_479 480 val_480 480 val_480 480 val_480 482 val_482 +483 val_483 484 val_484 +485 val_485 490 val_490 +491 val_491 +495 val_495 +497 val_497 498 val_498 498 val_498 498 val_498 @@ -2544,7 +3119,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: srcbucket2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2593,7 +3168,7 @@ STAGE PLANS: name: default.srcbucket2 name: default.srcbucket2 Truncated Path -> Alias: - /srcbucket2/000001_0 [s] + /srcbucket2 [s] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -2639,38 +3214,132 @@ ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket2 #### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +4 val_4 +8 val_8 9 val_9 +11 val_11 +18 val_18 +18 val_18 +26 val_26 +26 val_26 +34 val_34 37 val_37 37 val_37 +66 val_66 +72 val_72 +72 val_72 +74 val_74 +78 val_78 +82 val_82 +92 val_92 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +118 val_118 +118 val_118 +120 val_120 +120 val_120 +128 val_128 +128 val_128 +128 val_128 +131 val_131 +136 val_136 +143 val_143 +150 val_150 +152 val_152 +152 val_152 +155 val_155 +160 val_160 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +179 val_179 +179 val_179 +183 val_183 +186 val_186 189 val_189 +194 val_194 +200 val_200 +200 val_200 205 val_205 205 val_205 217 val_217 217 val_217 +218 val_218 +222 val_222 +223 val_223 +223 val_223 +228 val_228 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +235 val_235 +247 val_247 +262 val_262 +275 val_275 277 val_277 277 val_277 277 val_277 277 val_277 +280 val_280 +280 val_280 +282 val_282 +282 val_282 285 val_285 +288 val_288 +288 val_288 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +302 val_302 305 val_305 +306 val_306 309 val_309 309 val_309 321 val_321 321 val_321 +323 val_323 333 val_333 333 val_333 345 val_345 +366 val_366 +374 val_374 +375 val_375 389 val_389 +400 val_400 413 val_413 413 val_413 +424 val_424 +424 val_424 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +455 val_455 457 val_457 +460 val_460 +462 val_462 +462 val_462 469 val_469 469 val_469 469 val_469 469 val_469 469 val_469 477 val_477 +487 val_487 493 val_493 +496 val_496 PREHOOK: query: CREATE TABLE empty_bucket (key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -2714,6 +3383,61 @@ STAGE PLANS: tag: -1 auto parallelism: false Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: empty_bucket + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count 2 + bucket_field_name key + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.empty_bucket + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct empty_bucket { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count 2 + bucket_field_name key + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.empty_bucket + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct empty_bucket { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.empty_bucket + name: default.empty_bucket + Truncated Path -> Alias: + /empty_bucket [s] Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/sample7.q.out b/ql/src/test/results/clientpositive/sample7.q.out index 87630411a7..8df78fe264 100644 --- a/ql/src/test/results/clientpositive/sample7.q.out +++ b/ql/src/test/results/clientpositive/sample7.q.out @@ -96,7 +96,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -145,7 +145,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -434,33 +434,86 @@ order by key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1_n160 #### A masked pattern was here #### +105 val_105 +105 val_106 +105 val_106 110 val_111 +113 val_113 +113 val_113 116 val_116 116 val_117 +117 val_118 +117 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_120 +119 val_120 +119 val_120 +121 val_122 +121 val_122 +123 val_124 +123 val_124 126 val_126 126 val_127 126 val_127 134 val_134 134 val_134 134 val_135 +137 val_137 +137 val_137 +137 val_138 +153 val_153 +153 val_154 +153 val_154 156 val_156 156 val_157 156 val_157 +157 val_157 +157 val_158 +157 val_158 158 val_158 +163 val_163 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +177 val_177 +177 val_178 +177 val_178 178 val_178 178 val_179 178 val_179 184 val_185 +187 val_187 +187 val_187 +187 val_187 +195 val_195 +195 val_195 +197 val_197 +197 val_197 +197 val_198 206 val_207 206 val_207 206 val_207 208 val_208 208 val_208 208 val_208 +221 val_221 +221 val_221 +229 val_229 +229 val_229 +237 val_237 +237 val_237 +243 val_244 +243 val_244 244 val_244 244 val_245 244 val_245 244 val_245 +249 val_249 +249 val_250 +249 val_250 252 val_252 252 val_253 254 val_255 @@ -468,27 +521,40 @@ POSTHOOK: Input: default@dest1_n160 256 val_256 256 val_257 266 val_266 +271 val_272 272 val_272 272 val_272 272 val_273 286 val_286 286 val_287 +289 val_289 +289 val_290 292 val_292 292 val_293 292 val_293 304 val_305 +307 val_307 +307 val_307 308 val_308 308 val_309 308 val_309 +315 val_315 316 val_316 316 val_316 316 val_316 +317 val_317 +317 val_317 +317 val_318 326 val_327 +327 val_327 +327 val_327 +327 val_327 334 val_335 336 val_336 336 val_337 338 val_338 338 val_339 +339 val_339 342 val_342 342 val_342 342 val_343 @@ -501,21 +567,45 @@ POSTHOOK: Input: default@dest1_n160 348 val_348 348 val_348 348 val_349 +349 val_350 +349 val_350 +349 val_350 +349 val_350 352 val_353 352 val_353 +353 val_353 +353 val_353 +353 val_354 +355 val_356 +355 val_356 360 val_360 360 val_361 362 val_362 364 val_364 364 val_365 +369 val_369 +369 val_369 +369 val_369 +369 val_370 +371 val_372 +371 val_372 +371 val_372 +371 val_372 +377 val_377 378 val_378 378 val_379 +391 val_392 +391 val_392 392 val_392 392 val_393 392 val_393 396 val_396 396 val_396 396 val_396 +399 val_399 +399 val_399 +399 val_400 +399 val_400 402 val_402 402 val_403 402 val_403 @@ -525,15 +615,31 @@ POSTHOOK: Input: default@dest1_n160 404 val_405 404 val_405 404 val_405 +407 val_407 +407 val_408 +407 val_408 +407 val_408 408 val_409 408 val_409 410 val_411 +417 val_417 +417 val_417 +417 val_417 +419 val_419 +423 val_424 426 val_427 +427 val_427 +427 val_428 +427 val_428 440 val_441 440 val_441 +449 val_449 452 val_452 458 val_458 458 val_458 +463 val_463 +463 val_463 +463 val_464 466 val_466 466 val_466 466 val_466 @@ -544,7 +650,11 @@ POSTHOOK: Input: default@dest1_n160 478 val_478 478 val_479 478 val_479 +479 val_479 482 val_482 482 val_483 484 val_484 484 val_485 +497 val_497 +497 val_498 +497 val_498 diff --git a/ql/src/test/results/clientpositive/sample9.q.out b/ql/src/test/results/clientpositive/sample9.q.out index 0de49a698a..4819dc1353 100644 --- a/ql/src/test/results/clientpositive/sample9.q.out +++ b/ql/src/test/results/clientpositive/sample9.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -104,7 +104,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s:a] + /srcbucket [s:a] Stage: Stage-0 Fetch Operator @@ -126,75 +126,185 @@ POSTHOOK: Input: default@srcbucket #### A masked pattern was here #### 2 val_2 2 val_3 +3 val_4 6 val_7 6 val_7 10 val_10 10 val_11 +17 val_17 +19 val_19 +19 val_20 20 val_20 20 val_21 20 val_21 +21 val_22 +21 val_22 +21 val_22 +21 val_22 +27 val_27 +29 val_30 +29 val_30 30 val_30 30 val_31 +31 val_32 40 val_41 40 val_41 +41 val_41 +43 val_43 46 val_47 48 val_49 48 val_49 +51 val_51 +51 val_51 +51 val_52 54 val_54 +57 val_57 58 val_58 58 val_58 58 val_59 58 val_59 +59 val_60 60 val_61 +61 val_62 64 val_64 +65 val_65 +65 val_66 +65 val_66 +67 val_67 +67 val_67 68 val_69 +69 val_69 +69 val_70 70 val_70 70 val_70 70 val_70 70 val_71 +77 val_77 +77 val_78 +77 val_78 80 val_80 80 val_81 +83 val_83 +83 val_83 84 val_84 84 val_84 +85 val_85 +85 val_86 86 val_86 86 val_87 +89 val_90 +89 val_90 +89 val_90 90 val_90 90 val_90 90 val_90 +91 val_92 +93 val_94 +93 val_94 +93 val_94 96 val_96 +97 val_97 +97 val_97 +97 val_98 +97 val_98 98 val_98 98 val_98 +99 val_100 +101 val_102 +105 val_105 +105 val_106 +105 val_106 106 val_107 110 val_111 +113 val_113 +113 val_113 116 val_116 116 val_117 +117 val_118 +117 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_120 +119 val_120 +119 val_120 +121 val_122 +121 val_122 +123 val_124 +123 val_124 126 val_126 126 val_127 126 val_127 132 val_133 132 val_133 +133 val_133 +133 val_134 134 val_134 134 val_134 134 val_135 +135 val_136 +135 val_136 +135 val_136 +137 val_137 +137 val_137 +137 val_138 140 val_141 146 val_146 146 val_146 +149 val_149 +149 val_149 +149 val_150 +153 val_153 +153 val_154 +153 val_154 156 val_156 156 val_157 156 val_157 +157 val_157 +157 val_158 +157 val_158 158 val_158 162 val_162 162 val_163 +163 val_163 164 val_164 164 val_164 164 val_165 164 val_165 +165 val_165 +165 val_165 +165 val_166 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +177 val_177 +177 val_178 +177 val_178 178 val_178 178 val_179 178 val_179 +181 val_181 182 val_183 184 val_185 +185 val_186 +187 val_187 +187 val_187 +187 val_187 190 val_190 +191 val_191 +191 val_191 +191 val_192 +195 val_195 +195 val_195 +197 val_197 +197 val_197 +197 val_198 +199 val_199 +199 val_199 +199 val_199 +199 val_200 +201 val_201 202 val_202 206 val_207 206 val_207 @@ -204,34 +314,57 @@ POSTHOOK: Input: default@srcbucket 208 val_208 212 val_213 214 val_214 +215 val_216 216 val_216 216 val_216 216 val_217 +221 val_221 +221 val_221 226 val_226 226 val_227 226 val_227 226 val_227 226 val_227 +229 val_229 +229 val_229 +231 val_232 +233 val_233 +233 val_233 +237 val_237 +237 val_237 238 val_238 238 val_238 238 val_239 +239 val_239 +239 val_239 +239 val_240 +239 val_240 240 val_241 +243 val_244 +243 val_244 244 val_244 244 val_245 244 val_245 244 val_245 248 val_248 248 val_249 +249 val_249 +249 val_250 +249 val_250 252 val_252 252 val_253 254 val_255 +255 val_255 +255 val_255 256 val_256 256 val_256 256 val_257 260 val_260 260 val_261 260 val_261 +261 val_262 266 val_266 +271 val_272 272 val_272 272 val_272 272 val_273 @@ -241,10 +374,20 @@ POSTHOOK: Input: default@srcbucket 284 val_285 286 val_286 286 val_287 +287 val_287 +287 val_288 +287 val_288 +289 val_289 +289 val_290 +291 val_291 +291 val_292 +291 val_292 292 val_292 292 val_293 292 val_293 304 val_305 +307 val_307 +307 val_307 308 val_308 308 val_309 308 val_309 @@ -252,37 +395,81 @@ POSTHOOK: Input: default@srcbucket 310 val_311 310 val_311 310 val_311 +311 val_311 +311 val_311 +311 val_311 +313 val_314 +315 val_315 316 val_316 316 val_316 316 val_316 +317 val_317 +317 val_317 +317 val_318 324 val_325 +325 val_325 +325 val_325 326 val_327 +327 val_327 +327 val_327 +327 val_327 332 val_332 334 val_335 336 val_336 336 val_337 +337 val_338 338 val_338 338 val_339 +339 val_339 +341 val_341 +341 val_342 +341 val_342 +341 val_342 342 val_342 342 val_342 342 val_343 +343 val_344 344 val_344 344 val_344 344 val_345 +347 val_348 +347 val_348 348 val_348 348 val_348 348 val_348 348 val_348 348 val_348 348 val_349 +349 val_350 +349 val_350 +349 val_350 +349 val_350 +351 val_351 +351 val_352 +351 val_352 352 val_353 352 val_353 +353 val_353 +353 val_353 +353 val_354 +355 val_356 +355 val_356 360 val_360 360 val_361 362 val_362 364 val_364 364 val_365 +365 val_365 368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_370 +371 val_372 +371 val_372 +371 val_372 +371 val_372 +377 val_377 378 val_378 378 val_379 384 val_384 @@ -295,13 +482,24 @@ POSTHOOK: Input: default@srcbucket 386 val_387 386 val_387 388 val_389 +391 val_392 +391 val_392 392 val_392 392 val_393 392 val_393 +393 val_393 +393 val_394 +393 val_394 394 val_394 396 val_396 396 val_396 396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_400 +399 val_400 402 val_402 402 val_403 402 val_403 @@ -311,13 +509,36 @@ POSTHOOK: Input: default@srcbucket 404 val_405 404 val_405 404 val_405 +407 val_407 +407 val_408 +407 val_408 +407 val_408 408 val_409 408 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_410 +409 val_410 410 val_411 +411 val_411 +411 val_412 414 val_414 414 val_414 414 val_415 +417 val_417 +417 val_417 +417 val_417 +419 val_419 +421 val_421 +421 val_422 +421 val_422 +423 val_424 +425 val_426 426 val_427 +427 val_427 +427 val_428 +427 val_428 428 val_429 430 val_430 430 val_430 @@ -325,13 +546,24 @@ POSTHOOK: Input: default@srcbucket 430 val_431 432 val_432 432 val_433 +435 val_435 +435 val_436 +437 val_437 +437 val_438 440 val_441 440 val_441 +443 val_443 +443 val_444 +443 val_444 +443 val_444 444 val_444 446 val_446 446 val_447 446 val_447 +449 val_449 452 val_452 +453 val_453 +453 val_454 454 val_454 454 val_454 454 val_454 @@ -339,10 +571,19 @@ POSTHOOK: Input: default@srcbucket 454 val_455 458 val_458 458 val_458 +459 val_459 +459 val_459 +459 val_460 +463 val_463 +463 val_463 +463 val_464 466 val_466 466 val_466 466 val_466 +467 val_467 +467 val_468 472 val_472 +473 val_474 474 val_475 474 val_475 476 val_477 @@ -351,6 +592,7 @@ POSTHOOK: Input: default@srcbucket 478 val_478 478 val_479 478 val_479 +479 val_479 480 val_480 480 val_480 480 val_480 @@ -358,11 +600,23 @@ POSTHOOK: Input: default@srcbucket 480 val_481 482 val_482 482 val_483 +483 val_483 484 val_484 484 val_485 +485 val_485 +485 val_486 +485 val_486 488 val_489 490 val_490 490 val_491 +491 val_491 +491 val_492 +491 val_492 +495 val_495 +495 val_496 +497 val_497 +497 val_498 +497 val_498 498 val_498 498 val_498 498 val_498 diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index d35a2077b4..b2de1d0e79 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -1931,7 +1931,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1981,7 +1981,7 @@ STAGE PLANS: name: default.test_table1_n1 name: default.test_table1_n1 Truncated Path -> Alias: - /test_table1_n1/ds=1/000001_0 [test_table1_n1] + /test_table1_n1/ds=1 [test_table1_n1] Stage: Stage-0 Fetch Operator @@ -2042,7 +2042,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -2091,7 +2091,7 @@ STAGE PLANS: name: default.test_table3_n1 name: default.test_table3_n1 Truncated Path -> Alias: - /test_table3_n1/ds=1/000001_0 [test_table3_n1] + /test_table3_n1/ds=1 [test_table3_n1] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out index 8fab7ecbd0..7a2407c4b3 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out @@ -67,7 +67,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table_bucketed POSTHOOK: Input: default@test_table_bucketed@part=1 POSTHOOK: Output: hdfs://### HDFS PATH ### -0 +177 PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table_bucketed diff --git a/ql/src/test/results/clientpositive/spark/sample10.q.out b/ql/src/test/results/clientpositive/spark/sample10.q.out index ac28779591..6f1fe95d66 100644 --- a/ql/src/test/results/clientpositive/spark/sample10.q.out +++ b/ql/src/test/results/clientpositive/spark/sample10.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -138,7 +138,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -189,7 +189,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -240,7 +240,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -290,10 +290,10 @@ STAGE PLANS: name: default.srcpartbucket name: default.srcpartbucket Truncated Path -> Alias: - /srcpartbucket/ds=2008-04-08/hr=11/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-08/hr=12/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=11/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=12/000001_0 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=12 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=12 [srcpartbucket] Reducer 2 Execution mode: vectorized Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/spark/sample2.q.out b/ql/src/test/results/clientpositive/spark/sample2.q.out index 8b73fdf874..185253ab25 100644 --- a/ql/src/test/results/clientpositive/spark/sample2.q.out +++ b/ql/src/test/results/clientpositive/spark/sample2.q.out @@ -78,7 +78,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -127,7 +127,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/sample4.q.out b/ql/src/test/results/clientpositive/spark/sample4.q.out index 3269b015ec..86fb0f04e0 100644 --- a/ql/src/test/results/clientpositive/spark/sample4.q.out +++ b/ql/src/test/results/clientpositive/spark/sample4.q.out @@ -78,7 +78,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -127,7 +127,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/sample6.q.out b/ql/src/test/results/clientpositive/spark/sample6.q.out index 36532d7fbe..153f0fd4a8 100644 --- a/ql/src/test/results/clientpositive/spark/sample6.q.out +++ b/ql/src/test/results/clientpositive/spark/sample6.q.out @@ -78,7 +78,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -127,7 +127,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Stage: Stage-0 Move Operator @@ -499,7 +499,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -548,7 +548,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000001_0 [s] + /srcbucket [s] Reducer 2 Execution mode: vectorized Needs Tagging: false @@ -913,7 +913,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -962,7 +962,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Reducer 2 Execution mode: vectorized Needs Tagging: false @@ -2528,57 +2528,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count 4 - bucket_field_name key - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket2 - numFiles 4 - numRows 500 - rawDataSize 5312 - serialization.ddl struct srcbucket2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count 4 - bucket_field_name key - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.srcbucket2 - numFiles 4 - numRows 500 - rawDataSize 5312 - serialization.ddl struct srcbucket2 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket2 - name: default.srcbucket2 -#### A masked pattern was here #### - Partition - base file name: 000002_0 + base file name: srcbucket2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2627,8 +2577,7 @@ STAGE PLANS: name: default.srcbucket2 name: default.srcbucket2 Truncated Path -> Alias: - /srcbucket2/000000_0 [s] - /srcbucket2/000002_0 [s] + /srcbucket2 [s] Reducer 2 Execution mode: vectorized Needs Tagging: false @@ -2964,7 +2913,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: srcbucket2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -3013,7 +2962,7 @@ STAGE PLANS: name: default.srcbucket2 name: default.srcbucket2 Truncated Path -> Alias: - /srcbucket2/000001_0 [s] + /srcbucket2 [s] Reducer 2 Execution mode: vectorized Needs Tagging: false @@ -3235,6 +3184,61 @@ STAGE PLANS: tag: -1 auto parallelism: false Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: empty_bucket + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count 2 + bucket_field_name key + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.empty_bucket + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct empty_bucket { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count 2 + bucket_field_name key + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.empty_bucket + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct empty_bucket { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.empty_bucket + name: default.empty_bucket + Truncated Path -> Alias: + /empty_bucket [s] Reducer 2 Execution mode: vectorized Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/spark/sample7.q.out b/ql/src/test/results/clientpositive/spark/sample7.q.out index f7e025e5b3..f33867e8fd 100644 --- a/ql/src/test/results/clientpositive/spark/sample7.q.out +++ b/ql/src/test/results/clientpositive/spark/sample7.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000000_0 + base file name: srcbucket input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -128,7 +128,7 @@ STAGE PLANS: name: default.srcbucket name: default.srcbucket Truncated Path -> Alias: - /srcbucket/000000_0 [s] + /srcbucket [s] Stage: Stage-0 Move Operator