diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 77388dd..e35c299 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5662,14 +5662,13 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) colInfo.setAlias(nm[1]); } + String colName = colInfo.getInternalName(); //default column name if (field_schemas != null) { FieldSchema col = new FieldSchema(); - if ("".equals(nm[0]) || nm[1] == null) { - // ast expression is not a valid column name for table - col.setName(colInfo.getInternalName()); - } else { - col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase()); // remove `` + if (!("".equals(nm[0])) && nm[1] != null) { + colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // remove `` } + col.setName(colName);; col.setType(colInfo.getType().getTypeName()); field_schemas.add(col); } @@ -5680,7 +5679,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) } first = false; - cols = cols.concat(colInfo.getInternalName()); + cols = cols.concat(colName); // Replace VOID type with string when the output is a temp table or // local files. diff --git ql/src/test/queries/clientpositive/parquet_ctas.q ql/src/test/queries/clientpositive/parquet_ctas.q new file mode 100644 index 0000000..a1a9339 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_ctas.q @@ -0,0 +1,24 @@ +drop table staging; +drop table parquet_ctas; +drop table parquet_ctas_advanced; +drop table parquet_ctas_alias; +drop table parquet_ctas_mixed; + +create table staging (key int, value string) stored as textfile; +insert into table staging select * from srcbucket order by key limit 10; + +create table parquet_ctas stored as parquet as select * from staging; +describe parquet_ctas; +select * from parquet_ctas; + +create table parquet_ctas_advanced stored as parquet as select key+1,concat(value,"value") from staging; +describe parquet_ctas_advanced; +select * from parquet_ctas_advanced; + +create table parquet_ctas_alias stored as parquet as select key+1 as mykey,concat(value,"value") as myvalue from staging; +describe parquet_ctas_alias; +select * from parquet_ctas_alias; + +create table parquet_ctas_mixed stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging; +describe parquet_ctas_mixed; +select * from parquet_ctas_mixed; \ No newline at end of file diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index 9668855..0bda29a 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -836,7 +836,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string field.delim , line.delim diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out index 0ec0af5..b8e85ec 100644 --- ql/src/test/results/clientpositive/ctas_hadoop20.q.out +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -836,7 +836,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string field.delim , line.delim diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 3df75b7..ddda156 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -111,7 +111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -210,7 +210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -229,7 +229,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -264,7 +264,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -283,7 +283,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 @@ -293,7 +293,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1 + columns key,value columns.types string:string name default.merge_src2 serialization.format 1 diff --git ql/src/test/results/clientpositive/parquet_ctas.q.out ql/src/test/results/clientpositive/parquet_ctas.q.out new file mode 100644 index 0000000..47a8f28 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_ctas.q.out @@ -0,0 +1,184 @@ +PREHOOK: query: drop table staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table parquet_ctas +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table parquet_ctas +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table parquet_ctas_advanced +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table parquet_ctas_advanced +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table parquet_ctas_alias +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table parquet_ctas_alias +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table parquet_ctas_mixed +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table parquet_ctas_mixed +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table staging (key int, value string) stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table staging (key int, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@staging +PREHOOK: query: insert into table staging select * from srcbucket order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket +PREHOOK: Output: default@staging +POSTHOOK: query: insert into table staging select * from srcbucket order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket +POSTHOOK: Output: default@staging +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: create table parquet_ctas stored as parquet as select * from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +POSTHOOK: query: create table parquet_ctas stored as parquet as select * from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_ctas +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe parquet_ctas +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe parquet_ctas +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +key int from deserializer +value string from deserializer +PREHOOK: query: select * from parquet_ctas +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ctas +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ctas +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ctas +#### A masked pattern was here #### +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +0 val_0 +0 val_0 +0 val_0 +0 val_1 +0 val_1 +1 val_2 +2 val_3 +2 val_2 +3 val_4 +4 val_5 +PREHOOK: query: create table parquet_ctas_advanced stored as parquet as select key+1,concat(value,"value") from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +POSTHOOK: query: create table parquet_ctas_advanced stored as parquet as select key+1,concat(value,"value") from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_ctas_advanced +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe parquet_ctas_advanced +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe parquet_ctas_advanced +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +_c0 int from deserializer +_c1 string from deserializer +PREHOOK: query: select * from parquet_ctas_advanced +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ctas_advanced +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ctas_advanced +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ctas_advanced +#### A masked pattern was here #### +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +1 val_0value +1 val_0value +1 val_0value +1 val_1value +1 val_1value +2 val_2value +3 val_3value +3 val_2value +4 val_4value +5 val_5value +PREHOOK: query: create table parquet_ctas_alias stored as parquet as select key+1 as mykey,concat(value,"value") as myvalue from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +POSTHOOK: query: create table parquet_ctas_alias stored as parquet as select key+1 as mykey,concat(value,"value") as myvalue from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_ctas_alias +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe parquet_ctas_alias +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe parquet_ctas_alias +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +mykey int from deserializer +myvalue string from deserializer +PREHOOK: query: select * from parquet_ctas_alias +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ctas_alias +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ctas_alias +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ctas_alias +#### A masked pattern was here #### +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +1 val_0value +1 val_0value +1 val_0value +1 val_1value +1 val_1value +2 val_2value +3 val_3value +3 val_2value +4 val_4value +5 val_5value +PREHOOK: query: create table parquet_ctas_mixed stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +POSTHOOK: query: create table parquet_ctas_mixed stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_ctas_mixed +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: describe parquet_ctas_mixed +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe parquet_ctas_mixed +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +key int from deserializer +_c1 int from deserializer +myvalue string from deserializer +PREHOOK: query: select * from parquet_ctas_mixed +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ctas_mixed +#### A masked pattern was here #### +POSTHOOK: query: select * from parquet_ctas_mixed +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ctas_mixed +#### A masked pattern was here #### +POSTHOOK: Lineage: staging.key SIMPLE [(srcbucket)srcbucket.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: staging.value SIMPLE [(srcbucket)srcbucket.FieldSchema(name:value, type:string, comment:null), ] +0 1 val_0value +0 1 val_0value +0 1 val_0value +0 1 val_1value +0 1 val_1value +1 2 val_2value +2 3 val_3value +2 3 val_2value +3 4 val_4value +4 5 val_5value