From 211e84271b9122bcbd239cec2e0c3c1b37f56134 Mon Sep 17 00:00:00 2001 From: Nishant Date: Wed, 28 Mar 2018 00:53:20 +0530 Subject: [PATCH] [HIVE-19049] Add support for Add Columns for Druid --- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 6 +- .../apache/hadoop/hive/ql/plan/AlterTableDesc.java | 2 +- .../queries/clientpositive/druidmini_test_alter.q | 52 ++++++++ .../druid/druidmini_test_alter.q.out | 140 +++++++++++++++++++++ .../apache/hadoop/hive/metastore/HiveMetaHook.java | 11 ++ .../hadoop/hive/metastore/HiveMetaStoreClient.java | 4 + 6 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/druidmini_test_alter.q create mode 100644 ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 314a1868c0..7336d8397d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3926,8 +3926,7 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); String serializationLib = sd.getSerdeInfo().getSerializationLib(); AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); - List oldCols = (part == null - ? tbl.getColsForMetastore() : part.getColsForMetastore()); + List oldCols = sd.getCols(); List newCols = alterTbl.getNewCols(); if (serializationLib.equals( "org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) { @@ -3956,8 +3955,7 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); String serializationLib = sd.getSerdeInfo().getSerializationLib(); AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters()); - List oldCols = (part == null - ? tbl.getColsForMetastore() : part.getColsForMetastore()); + List oldCols = sd.getCols(); List newCols = new ArrayList(); Iterator iterOldCols = oldCols.iterator(); String oldName = alterTbl.getOldColName(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index f2d3d33ceb..3f82d16865 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -73,7 +73,7 @@ public String getName() { return name; } public static final List nonNativeTableAllowedTypes = - ImmutableList.of(ADDPROPS, DROPPROPS); + ImmutableList.of(ADDPROPS, DROPPROPS, ADDCOLS); } public static enum ProtectModeType { diff --git a/ql/src/test/queries/clientpositive/druidmini_test_alter.q b/ql/src/test/queries/clientpositive/druidmini_test_alter.q new file mode 100644 index 0000000000..15ae952d6a --- /dev/null +++ b/ql/src/test/queries/clientpositive/druidmini_test_alter.q @@ -0,0 +1,52 @@ +CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL; + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc; + +ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int); + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL; + + +SELECT COUNT(*) FROM druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +DROP TABLE druid_alltypesorc; diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out new file mode 100644 index 0000000000..f3c87f0f31 --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_alltypesorc +POSTHOOK: Lineage: druid_alltypesorc.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3033 +PREHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +9138 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3041 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +6097 +PREHOOK: query: DROP TABLE druid_alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: DROP TABLE druid_alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 2534fa2212..08c75531b4 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -92,4 +92,15 @@ public void rollbackDropTable(Table table) */ public void commitDropTable(Table table, boolean deleteData) throws MetaException; + + /** + * Called before a table is altered in the metastore + * during ALTER TABLE. + * + * @param table new table definition + */ + public default void preAlterTable(Table table) { + // default no-op implementation + } + } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index ae42077297..133d2f746f 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -410,6 +410,10 @@ public void alter_table(String defaultDatabaseName, String tblName, Table table, @Override public void alter_table_with_environmentContext(String dbname, String tbl_name, Table new_tbl, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { + HiveMetaHook hook = getHook(new_tbl); + if (hook != null) { + hook.preAlterTable(new_tbl); + } client.alter_table_with_environment_context(dbname, tbl_name, new_tbl, envContext); } -- 2.11.0 (Apple Git-81)