diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 023dbb9b1d..a70902ba1a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -83,6 +83,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ autoColumnStats_1.q,\ autoColumnStats_10.q,\ autoColumnStats_2.q,\ + avro_extschema_insert.q,\ bucket2.q,\ bucket3.q,\ bucket4.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 9ad4e71482..27cac216ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -681,7 +681,8 @@ private void dpSetup() { assert inputObjInspectors.length == 1 : "FileSinkOperator should have 1 parent, but it has " + inputObjInspectors.length; StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[0]; - this.dpStartCol = Utilities.getDPColOffset(conf); + // Using the assumption that soi contains all columns and dynamic partitioning columns are last. + this.dpStartCol = soi.getAllStructFieldRefs().size() - numDynParts; this.subSetOI = new SubStructObjectInspector(soi, 0, this.dpStartCol); this.dpVals = new ArrayList(numDynParts); this.dpWritables = new ArrayList(numDynParts); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 3deba27dd6..7de00656be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2093,6 +2093,12 @@ public static String formatBinaryString(byte[] array, int start, int length) { return names; } + /** + * Note: This will not return the correct number of columns in the case of + * Avro serde using an external schema URL. + * @param props TableDesc properties + * @return list of column names based on the table properties + */ public static List getColumnNames(Properties props) { List names = new ArrayList(); String colNames = props.getProperty(serdeConstants.LIST_COLUMNS); @@ -3895,22 +3901,6 @@ public static String jarFinderGetJar(Class klass) { return null; } - public static int getDPColOffset(FileSinkDesc conf) { - - if (conf.getWriteType() == AcidUtils.Operation.DELETE) { - // For deletes, there is only ROW__ID in non-partitioning, non-bucketing columns. - //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details. - return 1; - } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE) { - // For updates, ROW__ID is an extra column at index 0. - //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details. - return getColumnNames(conf.getTableInfo().getProperties()).size() + 1; - } else { - return getColumnNames(conf.getTableInfo().getProperties()).size(); - } - - } - public static List getStatsTmpDirs(BaseWork work, Configuration conf) { List statsTmpDirs = new ArrayList<>(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 55eb9d8928..eb1c1983b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -1274,8 +1274,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. Map parentConstants = cppCtx.getPropagatedConstants(parent); RowSchema rs = parent.getSchema(); boolean allConstant = true; - int dpColStartIdx = Utilities.getDPColOffset(fsdesc); List colInfos = rs.getSignature(); + // Using the assumption that colInfos contains all columns and dynamic partitioning columns are last. + int dpColStartIdx = colInfos.size() - dpCtx.getNumDPCols(); for (int i = dpColStartIdx; i < colInfos.size(); i++) { ColumnInfo ci = colInfos.get(i); if (parentConstants.get(ci) == null) { diff --git a/ql/src/test/queries/clientpositive/avro_extschema_insert.q b/ql/src/test/queries/clientpositive/avro_extschema_insert.q new file mode 100644 index 0000000000..c1980713b8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_extschema_insert.q @@ -0,0 +1,20 @@ +set hive.exec.dynamic.partition.mode=nonstrict; + +dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; + +create external table avro_extschema_insert1 (name string) partitioned by (p1 string) + stored as avro tblproperties ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc'); + +describe avro_extschema_insert1; + +create external table avro_extschema_insert2 like avro_extschema_insert1; + +insert overwrite table avro_extschema_insert1 partition (p1='part1') values ('col1_value', 1, 'col3_value'); + +insert overwrite table avro_extschema_insert2 partition (p1) select * from avro_extschema_insert1; +select * from avro_extschema_insert2; + +dfs -rm ${system:test.tmp.dir}/table1.avsc; + +drop table avro_extschema_insert1; +drop table avro_extschema_insert2; diff --git a/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out b/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out new file mode 100644 index 0000000000..2976ee3290 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/avro_extschema_insert.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: create external table avro_extschema_insert1 (name string) partitioned by (p1 string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_insert1 +POSTHOOK: query: create external table avro_extschema_insert1 (name string) partitioned by (p1 string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_insert1 +PREHOOK: query: describe avro_extschema_insert1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_insert1 +POSTHOOK: query: describe avro_extschema_insert1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_insert1 +col1 string +col2 bigint +col3 string +p1 string + +# Partition Information +# col_name data_type comment +p1 string +PREHOOK: query: create external table avro_extschema_insert2 like avro_extschema_insert1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_insert2 +POSTHOOK: query: create external table avro_extschema_insert2 like avro_extschema_insert1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_insert2 +PREHOOK: query: insert overwrite table avro_extschema_insert1 partition (p1='part1') values ('col1_value', 1, 'col3_value') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@avro_extschema_insert1@p1=part1 +POSTHOOK: query: insert overwrite table avro_extschema_insert1 partition (p1='part1') values ('col1_value', 1, 'col3_value') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@avro_extschema_insert1@p1=part1 +POSTHOOK: Lineage: avro_extschema_insert1 PARTITION(p1=part1).col1 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_insert1 PARTITION(p1=part1).col2 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_insert1 PARTITION(p1=part1).col3 SCRIPT [] +PREHOOK: query: insert overwrite table avro_extschema_insert2 partition (p1) select * from avro_extschema_insert1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_insert1 +PREHOOK: Input: default@avro_extschema_insert1@p1=part1 +PREHOOK: Output: default@avro_extschema_insert2 +POSTHOOK: query: insert overwrite table avro_extschema_insert2 partition (p1) select * from avro_extschema_insert1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_insert1 +POSTHOOK: Input: default@avro_extschema_insert1@p1=part1 +POSTHOOK: Output: default@avro_extschema_insert2@p1=part1 +POSTHOOK: Lineage: avro_extschema_insert2 PARTITION(p1=part1).col1 SIMPLE [(avro_extschema_insert1)avro_extschema_insert1.FieldSchema(name:col1, type:string, comment:), ] +POSTHOOK: Lineage: avro_extschema_insert2 PARTITION(p1=part1).col2 SIMPLE [(avro_extschema_insert1)avro_extschema_insert1.FieldSchema(name:col2, type:bigint, comment:), ] +POSTHOOK: Lineage: avro_extschema_insert2 PARTITION(p1=part1).col3 SIMPLE [(avro_extschema_insert1)avro_extschema_insert1.FieldSchema(name:col3, type:string, comment:), ] +PREHOOK: query: select * from avro_extschema_insert2 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_insert2 +PREHOOK: Input: default@avro_extschema_insert2@p1=part1 +#### A masked pattern was here #### +POSTHOOK: query: select * from avro_extschema_insert2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_insert2 +POSTHOOK: Input: default@avro_extschema_insert2@p1=part1 +#### A masked pattern was here #### +col1_value 1 col3_value part1 +#### A masked pattern was here #### +PREHOOK: query: drop table avro_extschema_insert1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_extschema_insert1 +PREHOOK: Output: default@avro_extschema_insert1 +POSTHOOK: query: drop table avro_extschema_insert1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_extschema_insert1 +POSTHOOK: Output: default@avro_extschema_insert1 +PREHOOK: query: drop table avro_extschema_insert2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_extschema_insert2 +PREHOOK: Output: default@avro_extschema_insert2 +POSTHOOK: query: drop table avro_extschema_insert2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_extschema_insert2 +POSTHOOK: Output: default@avro_extschema_insert2