diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index aba2946f7a..15e0f8f549 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -540,6 +540,7 @@ minillaplocal.query.files=\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ dynpart_sort_optimization_acid.q,\ + dynpart_sort_opt_bucketing.q,\ enforce_constraint_notnull.q,\ escape1.q,\ escape2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 9ec0d73916..6e9dda9a39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -77,6 +77,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.orc.OrcConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -288,16 +289,29 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List descs = new ArrayList(allRSCols.size()); List colNames = new ArrayList(); String colName; + final List fileSinkSchema = fsOp.getSchema().getSignature(); for (int i = 0; i < allRSCols.size(); i++) { ExprNodeDesc col = allRSCols.get(i); + ExprNodeDesc newColumnExpr = null; colName = col.getExprString(); colNames.add(colName); if (partitionPositions.contains(i) || sortPositions.contains(i)) { - descs.add(new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.KEY.toString()+"."+colName, null, false)); + newColumnExpr = (new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.KEY.toString()+"."+colName, null, false)); } else { - descs.add(new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.VALUE.toString()+"."+colName, null, false)); + newColumnExpr = (new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.VALUE.toString()+"."+colName, null, false)); } + + // make sure column type matches with expected types in FS op + if(i < fileSinkSchema.size()) { + final ColumnInfo fsColInfo = fileSinkSchema.get(i); + if (!newColumnExpr.getTypeInfo().equals(fsColInfo.getType())) { + newColumnExpr = ParseUtils + .createConversionCast(newColumnExpr, (PrimitiveTypeInfo) fsColInfo.getType()); + } + } + descs.add(newColumnExpr); } + RowSchema selRS = new RowSchema(fsParent.getSchema()); if (bucketColumns!= null && !bucketColumns.isEmpty()) { descs.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, @@ -407,14 +421,7 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { rsChild.getSchema().getSignature().size()) { return false; } - // if child is select and contains expression which isn't column it shouldn't - // be removed because otherwise we will end up with different types/schema later - // while introducing select for RS - for(ExprNodeDesc expr: rsChild.getColumnExprMap().values()){ - if(!(expr instanceof ExprNodeColumnDesc)){ - return false; - } - } + rsParent.getChildOperators().remove(rsToRemove); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f5df9cd302..51d824f071 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6830,7 +6830,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, if (updating(dest) || deleting(dest)) { partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true); } else { - partnCols = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, true); + partnCols = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); } } else { if(updating(dest) || deleting(dest)) { diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q index 7cb90d3f2d..5430ebdb48 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q @@ -74,6 +74,7 @@ dfs -cat ${hiveconf:hive.metastore.warehouse.dir}/t1_n147/e=epart/000008_0; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.stats.autogather=false; +set hive.optimize.sort.dynamic.partition.threshold=1; CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, @@ -88,6 +89,58 @@ explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', '-5', 'single family'); +select * from dynpart_sort_opt_bucketing_test; + +-- with auto stats +set hive.stats.autogather=true; +explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family'); +INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family'); +select * from dynpart_sort_opt_bucketing_test; + DROP TABLE dynpart_sort_opt_bucketing_test; +-- test case to test that CAST on bucketing column doesn't prevent sort dynamic partition + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop table if exists t1_staging; +create table t1_staging( + a string, + b int, + c int, + d string) + partitioned by (e decimal(18,0)) + clustered by(a) + into 256 buckets STORED AS TEXTFILE; +load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e=100); + +drop table t1_n147; +create table t1_n147( + a string, + b decimal(6,0), + c int, + d string) + partitioned by (e decimal(3,0)) + clustered by(a,b) + into 10 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +set hive.stats.autogather=false; +set hive.optimize.bucketingsorting = true; +explain insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging; +insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging; + +with q1 as (select count(*) as cnt from t1_staging), + q2 as (select count(*) as cnt from t1_n147) +select q1.cnt = q2.cnt from q1 join q2; + +drop table t1_staging; +drop table t1_n147; + + + diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out deleted file mode 100644 index 390230e855..0000000000 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out +++ /dev/null @@ -1,356 +0,0 @@ -PREHOOK: query: drop table if exists t1_staging -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists t1_staging -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table t1_staging( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) -into 256 buckets stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_staging -POSTHOOK: query: create table t1_staging( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) -into 256 buckets stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_staging -PREHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e='epart') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1_staging -POSTHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e='epart') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1_staging -POSTHOOK: Output: default@t1_staging@e=epart -PREHOOK: query: drop table t1_n147 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table t1_n147 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table t1_n147( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) into 10 buckets stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_n147 -POSTHOOK: query: create table t1_n147( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) into 10 buckets stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_n147 -PREHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_staging -PREHOOK: Input: default@t1_staging@e=epart -PREHOOK: Output: default@t1_n147 -POSTHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_staging -POSTHOOK: Input: default@t1_staging@e=epart -POSTHOOK: Output: default@t1_n147@e=epart -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).a SIMPLE [(t1_staging)t1_staging.FieldSchema(name:a, type:string, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).b SIMPLE [(t1_staging)t1_staging.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).c SIMPLE [(t1_staging)t1_staging.FieldSchema(name:c, type:int, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).d SIMPLE [(t1_staging)t1_staging.FieldSchema(name:d, type:string, comment:null), ] -PREHOOK: query: select 'bucket_0' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_0' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_0 -fff06c6e0fd675ebeff09350e6b7a3900115f72341fd353e5e185e8983d10534002015-01-21 -PREHOOK: query: select 'bucket_2' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_2' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_2 -ffff67aef705abda0b89d899e408c28ef230fd0bb2cb0bb23b057e946ba9ca91\N\N2015-01-21 -fff3474e56ee23c0df629b538268a438d74da36208bdb114bda2da4253f0b4c9\N\N2015-01-21 -PREHOOK: query: select 'bucket_4' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_4' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_4 -fffcb494aa56beb88fddb83cc3b0296d417d3ab7782be76c8c12d33e3f3d6a3c\N\N2015-01-21 -fffad1074d813e6db5c23302a9170fe472c2968844499c90445cbc8559d64fe1082015-01-21 -fff03007f38c32085bb4c9389270b965d371168032845555b663b7d4653ec8ee\N\N2015-01-21 -ffefa017a261a0b3e94c2386e0c47a015e2095e5d56b0f4fca2033d9755c9e45012015-01-21 -PREHOOK: query: select 'bucket_6' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_6' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_6 -fffee943d640a7714d09f9bd50dba08a9d0ebdd146655e4642c293a4396cb385\N\N2015-01-21 -fffc0450ec9b28bae495dffc87a37cc3eea6d2d067ccea8b333185d28847ae2a\N\N2015-01-21 -PREHOOK: query: select 'bucket_8' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_8' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_8 -fffcf74695669d1f444936243869f3586418c6d61d3abb5cc9acb67a3ad7bd2f002015-01-21 -fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21 -fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21 -fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21 -fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21 -PREHOOK: query: drop table t1_n147 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@t1_n147 -PREHOOK: Output: default@t1_n147 -POSTHOOK: query: drop table t1_n147 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@t1_n147 -POSTHOOK: Output: default@t1_n147 -PREHOOK: query: create table t1_n147( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) into 10 buckets stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1_n147 -POSTHOOK: query: create table t1_n147( -a string, -b int, -c int, -d string) -partitioned by (e string) -clustered by(a) -sorted by(a desc) into 10 buckets stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1_n147 -PREHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_staging -PREHOOK: Input: default@t1_staging@e=epart -PREHOOK: Output: default@t1_n147 -POSTHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_staging -POSTHOOK: Input: default@t1_staging@e=epart -POSTHOOK: Output: default@t1_n147@e=epart -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).a SIMPLE [(t1_staging)t1_staging.FieldSchema(name:a, type:string, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).b SIMPLE [(t1_staging)t1_staging.FieldSchema(name:b, type:int, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).c SIMPLE [(t1_staging)t1_staging.FieldSchema(name:c, type:int, comment:null), ] -POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).d SIMPLE [(t1_staging)t1_staging.FieldSchema(name:d, type:string, comment:null), ] -PREHOOK: query: select 'bucket_0' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_0' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_0 -fff06c6e0fd675ebeff09350e6b7a3900115f72341fd353e5e185e8983d10534002015-01-21 -PREHOOK: query: select 'bucket_2' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_2' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_2 -ffff67aef705abda0b89d899e408c28ef230fd0bb2cb0bb23b057e946ba9ca91\N\N2015-01-21 -fff3474e56ee23c0df629b538268a438d74da36208bdb114bda2da4253f0b4c9\N\N2015-01-21 -PREHOOK: query: select 'bucket_4' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_4' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_4 -fffcb494aa56beb88fddb83cc3b0296d417d3ab7782be76c8c12d33e3f3d6a3c\N\N2015-01-21 -fffad1074d813e6db5c23302a9170fe472c2968844499c90445cbc8559d64fe1082015-01-21 -fff03007f38c32085bb4c9389270b965d371168032845555b663b7d4653ec8ee\N\N2015-01-21 -ffefa017a261a0b3e94c2386e0c47a015e2095e5d56b0f4fca2033d9755c9e45012015-01-21 -PREHOOK: query: select 'bucket_6' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_6' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_6 -fffee943d640a7714d09f9bd50dba08a9d0ebdd146655e4642c293a4396cb385\N\N2015-01-21 -fffc0450ec9b28bae495dffc87a37cc3eea6d2d067ccea8b333185d28847ae2a\N\N2015-01-21 -PREHOOK: query: select 'bucket_8' -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -POSTHOOK: query: select 'bucket_8' -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -#### A masked pattern was here #### -bucket_8 -fffcf74695669d1f444936243869f3586418c6d61d3abb5cc9acb67a3ad7bd2f002015-01-21 -fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21 -fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21 -fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21 -fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21 -PREHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, - ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, - ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) - PARTITIONED BY (ca_location_type string) - CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dynpart_sort_opt_bucketing_test -POSTHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, - ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, - ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) - PARTITIONED BY (ca_location_type string) - CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test -PREHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', - 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', - '-5', 'single family') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@dynpart_sort_opt_bucketing_test -POSTHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', - 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', - '-5', 'single family') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(5555,'AAAAAAAADLFBAAAA','126','Highland Park','Court','Suite E','San Jose','King George County','VA','28003','United States','-5','single family')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Select Operator - expressions: col1 (type: int), col2 (type: string), col3 (type: string), col4 (type: string), col5 (type: string), col6 (type: string), col7 (type: string), col8 (type: string), col9 (type: string), col10 (type: string), col11 (type: string), col12 (type: string), col13 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col8 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col8 (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col8 (type: string), VALUE._col9 (type: string), CAST( VALUE._col10 AS decimal(5,2)) (type: decimal(5,2)), VALUE._col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dynpart_sort_opt_bucketing_test - Write Type: INSERT - - Stage: Stage-0 - Move Operator - tables: - partition: - ca_location_type - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.dynpart_sort_opt_bucketing_test - Write Type: INSERT - -PREHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', - 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', - '-5', 'single family') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@dynpart_sort_opt_bucketing_test -POSTHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', - 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', - '-5', 'single family') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_id SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_sk SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_city SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_country SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_county SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_gmt_offset SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_state SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_name SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_number SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_type SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_suite_number SCRIPT [] -POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_zip SCRIPT [] -PREHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dynpart_sort_opt_bucketing_test -PREHOOK: Output: default@dynpart_sort_opt_bucketing_test -POSTHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test -POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out new file mode 100644 index 0000000000..ac85f2cd84 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out @@ -0,0 +1,701 @@ +PREHOOK: query: drop table if exists t1_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists t1_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table t1_staging( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) +into 256 buckets stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_staging +POSTHOOK: query: create table t1_staging( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) +into 256 buckets stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_staging +PREHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e='epart') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_staging +POSTHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e='epart') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_staging +POSTHOOK: Output: default@t1_staging@e=epart +PREHOOK: query: drop table t1_n147 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table t1_n147 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table t1_n147( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) into 10 buckets stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: create table t1_n147( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) into 10 buckets stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n147 +PREHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_staging +PREHOOK: Input: default@t1_staging@e=epart +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_staging +POSTHOOK: Input: default@t1_staging@e=epart +POSTHOOK: Output: default@t1_n147@e=epart +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).a SIMPLE [(t1_staging)t1_staging.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).b SIMPLE [(t1_staging)t1_staging.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).c SIMPLE [(t1_staging)t1_staging.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).d SIMPLE [(t1_staging)t1_staging.FieldSchema(name:d, type:string, comment:null), ] +PREHOOK: query: select 'bucket_0' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_0' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_0 +fff06c6e0fd675ebeff09350e6b7a3900115f72341fd353e5e185e8983d10534002015-01-21 +PREHOOK: query: select 'bucket_2' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_2' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_2 +ffff67aef705abda0b89d899e408c28ef230fd0bb2cb0bb23b057e946ba9ca91\N\N2015-01-21 +fff3474e56ee23c0df629b538268a438d74da36208bdb114bda2da4253f0b4c9\N\N2015-01-21 +PREHOOK: query: select 'bucket_4' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_4' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_4 +fffcb494aa56beb88fddb83cc3b0296d417d3ab7782be76c8c12d33e3f3d6a3c\N\N2015-01-21 +fffad1074d813e6db5c23302a9170fe472c2968844499c90445cbc8559d64fe1082015-01-21 +fff03007f38c32085bb4c9389270b965d371168032845555b663b7d4653ec8ee\N\N2015-01-21 +ffefa017a261a0b3e94c2386e0c47a015e2095e5d56b0f4fca2033d9755c9e45012015-01-21 +PREHOOK: query: select 'bucket_6' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_6' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_6 +fffee943d640a7714d09f9bd50dba08a9d0ebdd146655e4642c293a4396cb385\N\N2015-01-21 +fffc0450ec9b28bae495dffc87a37cc3eea6d2d067ccea8b333185d28847ae2a\N\N2015-01-21 +PREHOOK: query: select 'bucket_8' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_8' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_8 +fffcf74695669d1f444936243869f3586418c6d61d3abb5cc9acb67a3ad7bd2f002015-01-21 +fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21 +fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21 +fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21 +fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21 +PREHOOK: query: drop table t1_n147 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_n147 +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: drop table t1_n147 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_n147 +POSTHOOK: Output: default@t1_n147 +PREHOOK: query: create table t1_n147( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) into 10 buckets stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: create table t1_n147( +a string, +b int, +c int, +d string) +partitioned by (e string) +clustered by(a) +sorted by(a desc) into 10 buckets stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n147 +PREHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_staging +PREHOOK: Input: default@t1_staging@e=epart +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,'epart' from t1_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_staging +POSTHOOK: Input: default@t1_staging@e=epart +POSTHOOK: Output: default@t1_n147@e=epart +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).a SIMPLE [(t1_staging)t1_staging.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).b SIMPLE [(t1_staging)t1_staging.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).c SIMPLE [(t1_staging)t1_staging.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=epart).d SIMPLE [(t1_staging)t1_staging.FieldSchema(name:d, type:string, comment:null), ] +PREHOOK: query: select 'bucket_0' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_0' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_0 +fff06c6e0fd675ebeff09350e6b7a3900115f72341fd353e5e185e8983d10534002015-01-21 +PREHOOK: query: select 'bucket_2' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_2' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_2 +ffff67aef705abda0b89d899e408c28ef230fd0bb2cb0bb23b057e946ba9ca91\N\N2015-01-21 +fff3474e56ee23c0df629b538268a438d74da36208bdb114bda2da4253f0b4c9\N\N2015-01-21 +PREHOOK: query: select 'bucket_4' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_4' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_4 +fffcb494aa56beb88fddb83cc3b0296d417d3ab7782be76c8c12d33e3f3d6a3c\N\N2015-01-21 +fffad1074d813e6db5c23302a9170fe472c2968844499c90445cbc8559d64fe1082015-01-21 +fff03007f38c32085bb4c9389270b965d371168032845555b663b7d4653ec8ee\N\N2015-01-21 +ffefa017a261a0b3e94c2386e0c47a015e2095e5d56b0f4fca2033d9755c9e45012015-01-21 +PREHOOK: query: select 'bucket_6' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_6' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_6 +fffee943d640a7714d09f9bd50dba08a9d0ebdd146655e4642c293a4396cb385\N\N2015-01-21 +fffc0450ec9b28bae495dffc87a37cc3eea6d2d067ccea8b333185d28847ae2a\N\N2015-01-21 +PREHOOK: query: select 'bucket_8' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'bucket_8' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +bucket_8 +fffcf74695669d1f444936243869f3586418c6d61d3abb5cc9acb67a3ad7bd2f002015-01-21 +fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21 +fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21 +fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21 +fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21 +PREHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, + ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, + ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) + PARTITIONED BY (ca_location_type string) + CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, + ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, + ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) + PARTITIONED BY (ca_location_type string) + CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test +PREHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(5555,'AAAAAAAADLFBAAAA','126','Highland Park','Court','Suite E','San Jose','King George County','VA','28003','United States','-5','single family')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string), col3 (type: string), col4 (type: string), col5 (type: string), col6 (type: string), col7 (type: string), col8 (type: string), col9 (type: string), col10 (type: string), col11 (type: string), col12 (type: string), col13 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: string), _bucket_number (type: string), _col8 (type: string) + null sort order: aaa + sort order: +++ + Map-reduce partition columns: _col12 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), KEY._col8 (type: string), VALUE._col9 (type: string), VALUE._col10 (type: string), CAST( VALUE._col11 AS decimal(5,2)) (type: decimal(5,2)), KEY._col12 (type: string), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _bucket_number + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ca_location_type + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + +PREHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_id SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_sk SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_city SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_country SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_county SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_gmt_offset SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_state SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_name SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_type SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_suite_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_zip SCRIPT [] +PREHOOK: query: select * from dynpart_sort_opt_bucketing_test +PREHOOK: type: QUERY +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +#### A masked pattern was here #### +POSTHOOK: query: select * from dynpart_sort_opt_bucketing_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +#### A masked pattern was here #### +5555 AAAAAAAADLFBAAAA 126 Highland Park Court Suite E San Jose King George County VA 28003 United States -5.00 single family +PREHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(5555,'AAAAAAAADLFBAAAA','126','Highland Park','Court','Suite E','San Jose','King George County','VA','28003','United States','-5','single family')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string), col3 (type: string), col4 (type: string), col5 (type: string), col6 (type: string), col7 (type: string), col8 (type: string), col9 (type: string), col10 (type: string), col11 (type: string), col12 (type: string), col13 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: string), _bucket_number (type: string), _col8 (type: string) + null sort order: aaa + sort order: +++ + Map-reduce partition columns: _col12 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), KEY._col8 (type: string), VALUE._col9 (type: string), VALUE._col10 (type: string), CAST( VALUE._col11 AS decimal(5,2)) (type: decimal(5,2)), KEY._col12 (type: string), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _bucket_number + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ca_location_type + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset + Column Types: int, string, string, string, string, string, string, string, string, string, string, decimal(5,2) + Table: default.dynpart_sort_opt_bucketing_test + +PREHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_id SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_sk SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_city SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_country SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_county SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_gmt_offset SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_state SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_name SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_type SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_suite_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_zip SCRIPT [] +PREHOOK: query: select * from dynpart_sort_opt_bucketing_test +PREHOOK: type: QUERY +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +#### A masked pattern was here #### +POSTHOOK: query: select * from dynpart_sort_opt_bucketing_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +#### A masked pattern was here #### +5555 AAAAAAAADLFBAAAA 126 Highland Park Court Suite E San Jose King George County VA 28003 United States -5.00 single family +5555 AAAAAAAADLFBAAAA 126 Highland Park Court Suite E San Jose King George County VA 28003 United States -5.00 single family +PREHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test +PREHOOK: query: drop table if exists t1_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_staging +PREHOOK: Output: default@t1_staging +POSTHOOK: query: drop table if exists t1_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_staging +POSTHOOK: Output: default@t1_staging +PREHOOK: query: create table t1_staging( + a string, + b int, + c int, + d string) + partitioned by (e decimal(18,0)) + clustered by(a) + into 256 buckets STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_staging +POSTHOOK: query: create table t1_staging( + a string, + b int, + c int, + d string) + partitioned by (e decimal(18,0)) + clustered by(a) + into 256 buckets STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_staging +PREHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e=100) +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_staging +POSTHOOK: query: load data local inpath '../../data/files/sortdp/000000_0' overwrite into table t1_staging partition (e=100) +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_staging +POSTHOOK: Output: default@t1_staging@e=100 +PREHOOK: query: drop table t1_n147 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_n147 +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: drop table t1_n147 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_n147 +POSTHOOK: Output: default@t1_n147 +PREHOOK: query: create table t1_n147( + a string, + b decimal(6,0), + c int, + d string) + partitioned by (e decimal(3,0)) + clustered by(a,b) + into 10 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: create table t1_n147( + a string, + b decimal(6,0), + c int, + d string) + partitioned by (e decimal(3,0)) + clustered by(a,b) + into 10 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n147 +PREHOOK: query: explain insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_staging +PREHOOK: Input: default@t1_staging@e=100 +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: explain insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_staging +POSTHOOK: Input: default@t1_staging@e=100 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_staging + Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: a (type: string), b (type: int), c (type: int), d (type: string), e (type: decimal(18,0)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col4 (type: decimal(18,0)), _bucket_number (type: string), _col0 (type: string), _col1 (type: int) + null sort order: aaaa + sort order: ++++ + Map-reduce partition columns: _col4 (type: decimal(18,0)) + Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col2 (type: int), _col3 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: string), CAST( KEY._col1 AS decimal(6,0)) (type: decimal(6,0)), VALUE._col2 (type: int), VALUE._col3 (type: string), KEY._col4 (type: decimal(18,0)), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t1_n147 + Write Type: INSERT + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + e + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t1_n147 + Write Type: INSERT + +PREHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_staging +PREHOOK: Input: default@t1_staging@e=100 +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: insert overwrite table t1_n147 partition(e) select a,b,c,d,e from t1_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_staging +POSTHOOK: Input: default@t1_staging@e=100 +POSTHOOK: Output: default@t1_n147@e=100 +POSTHOOK: Lineage: t1_n147 PARTITION(e=100).a SIMPLE [(t1_staging)t1_staging.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=100).b EXPRESSION [(t1_staging)t1_staging.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=100).c SIMPLE [(t1_staging)t1_staging.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: t1_n147 PARTITION(e=100).d SIMPLE [(t1_staging)t1_staging.FieldSchema(name:d, type:string, comment:null), ] +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: with q1 as (select count(*) as cnt from t1_staging), + q2 as (select count(*) as cnt from t1_n147) +select q1.cnt = q2.cnt from q1 join q2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n147 +PREHOOK: Input: default@t1_n147@e=100 +PREHOOK: Input: default@t1_staging +PREHOOK: Input: default@t1_staging@e=100 +#### A masked pattern was here #### +POSTHOOK: query: with q1 as (select count(*) as cnt from t1_staging), + q2 as (select count(*) as cnt from t1_n147) +select q1.cnt = q2.cnt from q1 join q2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n147 +POSTHOOK: Input: default@t1_n147@e=100 +POSTHOOK: Input: default@t1_staging +POSTHOOK: Input: default@t1_staging@e=100 +#### A masked pattern was here #### +true +PREHOOK: query: drop table t1_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_staging +PREHOOK: Output: default@t1_staging +POSTHOOK: query: drop table t1_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_staging +POSTHOOK: Output: default@t1_staging +PREHOOK: query: drop table t1_n147 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1_n147 +PREHOOK: Output: default@t1_n147 +POSTHOOK: query: drop table t1_n147 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1_n147 +POSTHOOK: Output: default@t1_n147