diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8061c1a45d..b8e97bdaa1 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -553,6 +553,7 @@ minillaplocal.query.files=\ insert_dir_distcp.q,\ insert_into_default_keyword.q,\ insert_into_with_schema.q,\ + insert_only_empty_query.q, \ insert_overwrite.q,\ insert_values_orig_table.q,\ insert_values_orig_table_use_metadata.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index ac89dd9efe..d59ca8c564 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4231,8 +4231,8 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con Utilities.FILE_OP_LOGGER.debug("Looking for files in: {}", specPath); AcidUtils.IdPathFilter filter = new AcidUtils.IdPathFilter(writeId, stmtId); - if (isMmCtas && !fs.exists(specPath)) { - Utilities.FILE_OP_LOGGER.info("Creating table directory for CTAS with no output at {}", specPath); + if (!fs.exists(specPath)) { + Utilities.FILE_OP_LOGGER.info("Creating directory with no output at {}", specPath); FileUtils.mkdir(fs, specPath, hconf); } Path[] files = getMmDirectoryCandidates( diff --git a/ql/src/test/queries/clientpositive/insert_only_empty_query.q b/ql/src/test/queries/clientpositive/insert_only_empty_query.q new file mode 100644 index 0000000000..5e731ebe57 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_only_empty_query.q @@ -0,0 +1,24 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table src_emptybucket_partitioned_1 (name string, age int, gpa decimal(3,2)) + partitioned by(year int) + clustered by (age) + sorted by (age) + into 100 buckets + stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +create table source_table(name string, age int, gpa decimal(3,2)); +insert into source_table values("name", 56, 4); + +explain insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0; +insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0; + +insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 1; +select * from src_emptybucket_partitioned_1; + +drop table src_emptybucket_partitioned_1; +drop table source_table; diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out new file mode 100644 index 0000000000..bfa72c59a3 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: create table src_emptybucket_partitioned_1 (name string, age int, gpa decimal(3,2)) + partitioned by(year int) + clustered by (age) + sorted by (age) + into 100 buckets + stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_emptybucket_partitioned_1 +POSTHOOK: query: create table src_emptybucket_partitioned_1 (name string, age int, gpa decimal(3,2)) + partitioned by(year int) + clustered by (age) + sorted by (age) + into 100 buckets + stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_emptybucket_partitioned_1 +PREHOOK: query: create table source_table(name string, age int, gpa decimal(3,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source_table +POSTHOOK: query: create table source_table(name string, age int, gpa decimal(3,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source_table +PREHOOK: query: insert into source_table values("name", 56, 4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@source_table +POSTHOOK: query: insert into source_table values("name", 56, 4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@source_table +POSTHOOK: Lineage: source_table.age SCRIPT [] +POSTHOOK: Lineage: source_table.gpa SCRIPT [] +POSTHOOK: Lineage: source_table.name SCRIPT [] +PREHOOK: query: explain insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table +PREHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +POSTHOOK: query: explain insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table +POSTHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: source_table + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: decimal(3,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: decimal(3,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(3,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 0 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: decimal(3,2)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: decimal(3,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.src_emptybucket_partitioned_1 + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: decimal(3,2)), UDFToInteger('2015') (type: int) + outputColumnNames: name, age, gpa, year + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(name, 'hll'), compute_stats(age, 'hll'), compute_stats(gpa, 'hll') + keys: year (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + year 2015 + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.src_emptybucket_partitioned_1 + micromanaged table: true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: name, age, gpa + Column Types: string, int, decimal(3,2) + Table: default.src_emptybucket_partitioned_1 + +PREHOOK: query: insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table +PREHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +POSTHOOK: query: insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table +POSTHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).age SIMPLE [(source_table)source_table.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).gpa SIMPLE [(source_table)source_table.FieldSchema(name:gpa, type:decimal(3,2), comment:null), ] +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).name SIMPLE [(source_table)source_table.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@source_table +PREHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +POSTHOOK: query: insert into table src_emptybucket_partitioned_1 partition(year=2015) select * from source_table limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source_table +POSTHOOK: Output: default@src_emptybucket_partitioned_1@year=2015 +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).age SIMPLE [(source_table)source_table.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).gpa SIMPLE [(source_table)source_table.FieldSchema(name:gpa, type:decimal(3,2), comment:null), ] +POSTHOOK: Lineage: src_emptybucket_partitioned_1 PARTITION(year=2015).name SIMPLE [(source_table)source_table.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from src_emptybucket_partitioned_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_emptybucket_partitioned_1 +PREHOOK: Input: default@src_emptybucket_partitioned_1@year=2015 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_emptybucket_partitioned_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_emptybucket_partitioned_1 +POSTHOOK: Input: default@src_emptybucket_partitioned_1@year=2015 +#### A masked pattern was here #### +name 56 4.00 2015 +PREHOOK: query: drop table src_emptybucket_partitioned_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_emptybucket_partitioned_1 +PREHOOK: Output: default@src_emptybucket_partitioned_1 +POSTHOOK: query: drop table src_emptybucket_partitioned_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_emptybucket_partitioned_1 +POSTHOOK: Output: default@src_emptybucket_partitioned_1 +PREHOOK: query: drop table source_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@source_table +PREHOOK: Output: default@source_table +POSTHOOK: query: drop table source_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@source_table +POSTHOOK: Output: default@source_table