diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 9dbd869d57..136709c6dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -379,7 +379,18 @@ public Path getBucketPath(int bucketNum) { if (srcs == null) { return null; } - return srcs[bucketNum].getPath(); + + // Compute bucketid from srcs and return the 1st match. + for (FileStatus src : srcs) { + String bucketName = src.getPath().getName(); + String bucketIdStr = Utilities.getBucketFileNameFromPathSubString(bucketName); + int bucketId = Utilities.getBucketIdFromFile(bucketIdStr); + if (bucketId == bucketNum) { + // match, return + return src.getPath(); + } + } + return null; } @SuppressWarnings("nls") diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java index 8200e6a237..75bce638a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Stack; +import org.apache.hadoop.hive.metastore.TableType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -192,8 +193,8 @@ public static NodeProcessor getDefaultProc() { // check if input pruning is possible // TODO: this code is buggy - it relies on having one file per bucket; no MM support (by design). - boolean isMmTable = AcidUtils.isInsertOnlyTable(part.getTable().getParameters()); - if (sampleDescr.getInputPruning() && !isMmTable) { + boolean isManagedTable = part.getTable().getTableType() == TableType.MANAGED_TABLE; + if (sampleDescr.getInputPruning() && !isManagedTable) { LOG.trace("numerator = " + num); LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount); @@ -220,7 +221,7 @@ public static NodeProcessor getDefaultProc() { } } else { // need to do full scan - fullScanMsg = isMmTable ? "MM table" : "Tablesample not on clustered columns"; + fullScanMsg = isManagedTable ? "Managed table" : "Tablesample not on clustered columns"; } LOG.warn(fullScanMsg + ", using full table scan"); Path[] ret = part.getPath(); diff --git a/ql/src/test/queries/clientpositive/sample10_mm.q b/ql/src/test/queries/clientpositive/sample10_mm.q new file mode 100644 index 0000000000..f653e67875 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sample10_mm.q @@ -0,0 +1,34 @@ +--! qt:dataset:srcpart +set hive.mapred.mode=nonstrict; +set hive.exec.submitviachild=false; +set hive.exec.submit.local.task.via.child=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +set hive.exec.reducers.max=4; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set hive.default.fileformat=RCFILE; +set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables,org.apache.hadoop.hive.ql.hooks.UpdateInputAccessTimeHook$PreExec; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10; + + +select * from srcpartbucket; +explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key); +select key from srcpartbucket tablesample (bucket 1 out of 4 on key); +select key from srcpartbucket tablesample (bucket 2 out of 4 on key); +select key from srcpartbucket tablesample (bucket 3 out of 4 on key); +select key from srcpartbucket tablesample (bucket 4 out of 4 on key); + +explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key; +select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key; + diff --git a/ql/src/test/results/clientpositive/llap/sample10.q.out b/ql/src/test/results/clientpositive/llap/sample10.q.out index 1b95314980..cbceadca02 100644 --- a/ql/src/test/results/clientpositive/llap/sample10.q.out +++ b/ql/src/test/results/clientpositive/llap/sample10.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -140,7 +140,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -191,7 +191,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -242,7 +242,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000002_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -292,10 +292,10 @@ STAGE PLANS: name: default.srcpartbucket name: default.srcpartbucket Truncated Path -> Alias: - /srcpartbucket/ds=2008-04-08/hr=11/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-08/hr=12/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=11/000002_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=12/000002_0 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=12 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=12 [srcpartbucket] Reducer 2 Execution mode: vectorized, llap Needs Tagging: false @@ -367,6 +367,8 @@ POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### +2008-04-08 14 +2008-04-09 14 PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket @@ -383,6 +385,8 @@ POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### +2008-04-08 4 +2008-04-09 4 PREHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket diff --git a/ql/src/test/results/clientpositive/llap/sample10_mm.q.out b/ql/src/test/results/clientpositive/llap/sample10_mm.q.out new file mode 100644 index 0000000000..28d0cd052f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/sample10_mm.q.out @@ -0,0 +1,346 @@ +PREHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpartbucket +POSTHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpartbucket +PREHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpartbucket +POSTHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from srcpartbucket +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpartbucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 val_5 2008-04-08 11 +2 val_2 2008-04-08 11 +5 val_5 2008-04-08 11 +5 val_5 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +9 val_9 2008-04-08 11 +8 val_8 2008-04-08 11 +4 val_4 2008-04-08 11 +5 val_5 2008-04-08 12 +2 val_2 2008-04-08 12 +5 val_5 2008-04-08 12 +5 val_5 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +0 val_0 2008-04-08 12 +9 val_9 2008-04-08 12 +8 val_8 2008-04-08 12 +4 val_4 2008-04-08 12 +5 val_5 2008-04-09 11 +2 val_2 2008-04-09 11 +5 val_5 2008-04-09 11 +5 val_5 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +0 val_0 2008-04-09 11 +9 val_9 2008-04-09 11 +8 val_8 2008-04-09 11 +4 val_4 2008-04-09 11 +5 val_5 2008-04-09 12 +2 val_2 2008-04-09 12 +5 val_5 2008-04-09 12 +5 val_5 2008-04-09 12 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +0 val_0 2008-04-09 12 +9 val_9 2008-04-09 12 +8 val_8 2008-04-09 12 +4 val_4 2008-04-09 12 +PREHOOK: query: explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpartbucket + Filter Operator + predicate: (((hash(key) & 2147483647) % 4) = 1) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +5 +2 +5 +5 +0 +0 +0 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +9 +8 +9 +8 +9 +8 +9 +8 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +4 +4 +4 +4 +PREHOOK: query: explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain + select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpartbucket + Statistics: Num rows: 40 Data size: 3400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((hash(key) & 2147483647) % 4) = 1) (type: boolean) + Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 1 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 2 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5 +0 +2 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 3 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +9 +8 +PREHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpartbucket +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpartbucket tablesample (bucket 4 out of 4 on key) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpartbucket +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +4 diff --git a/ql/src/test/results/clientpositive/spark/sample10.q.out b/ql/src/test/results/clientpositive/spark/sample10.q.out index ac28779591..6f1fe95d66 100644 --- a/ql/src/test/results/clientpositive/spark/sample10.q.out +++ b/ql/src/test/results/clientpositive/spark/sample10.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -138,7 +138,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -189,7 +189,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -240,7 +240,7 @@ STAGE PLANS: name: default.srcpartbucket #### A masked pattern was here #### Partition - base file name: 000001_0 + base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -290,10 +290,10 @@ STAGE PLANS: name: default.srcpartbucket name: default.srcpartbucket Truncated Path -> Alias: - /srcpartbucket/ds=2008-04-08/hr=11/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-08/hr=12/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=11/000001_0 [srcpartbucket] - /srcpartbucket/ds=2008-04-09/hr=12/000001_0 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-08/hr=12 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=11 [srcpartbucket] + /srcpartbucket/ds=2008-04-09/hr=12 [srcpartbucket] Reducer 2 Execution mode: vectorized Needs Tagging: false