From f77b17d1b0687f8545931bc6e92de4ebbf2730c2 Mon Sep 17 00:00:00 2001 From: Nishant Date: Wed, 28 Mar 2018 12:59:08 +0530 Subject: [PATCH] [HIVE-19049] Add support for Add Columns for Druid --- .../hadoop/hive/druid/DruidStorageHandler.java | 14 ++ .../apache/hadoop/hive/druid/serde/DruidSerDe.java | 6 + .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 7 +- .../apache/hadoop/hive/ql/plan/AlterTableDesc.java | 2 +- .../queries/clientpositive/druidmini_test_alter.q | 52 +++++ .../druid/druidmini_test_alter.q.out | 210 +++++++++++++++++++++ .../apache/hadoop/hive/metastore/HiveMetaHook.java | 27 +++ .../hadoop/hive/metastore/HiveMetaStoreClient.java | 4 + .../hive/metastore/utils/MetaStoreUtils.java | 6 +- 9 files changed, 324 insertions(+), 4 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/druidmini_test_alter.q create mode 100644 ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index abc856b40f..eca407ee93 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -23,6 +23,7 @@ import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; @@ -103,6 +105,8 @@ private static final HttpClient HTTP_CLIENT; + private static List allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS", "ADDCOLS"); + static { final Lifecycle lifecycle = new Lifecycle(); try { @@ -681,4 +685,14 @@ private static HttpClient makeHttpClient(Lifecycle lifecycle) { public static HttpClient getHttpClient() { return HTTP_CLIENT; } + + @Override + public void preAlterTable(Table table, EnvironmentContext context) throws MetaException { + String alterOpType = context.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + // alterOpType is null in case of stats update + if (alterOpType != null && !allowedAlterTypes.contains(alterOpType)) { + throw new MetaException( + "ALTER TABLE can not be used for " + alterOpType + " to a non-native table "); + } + } } diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java index 9b8a26e940..d991adb088 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.druid.DruidStorageHandler; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; @@ -486,4 +487,9 @@ public ObjectInspector getObjectInspector() { return inspector; } + @Override + public boolean shouldStoreFieldsInMetastore(Map tableParams) { + // If Druid table is not an external table store the schema in metadata store. + return !MetaStoreUtils.isExternal(tableParams); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 314a1868c0..01cadeaaab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; @@ -3822,10 +3823,12 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { } try { + EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); + environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE, alterTbl.getOp().name()); if (allPartitions == null) { - db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), alterTbl.getEnvironmentContext()); + db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), environmentContext); } else { - db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, alterTbl.getEnvironmentContext()); + db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, environmentContext); } // Add constraints if necessary addConstraints(db, alterTbl); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index f2d3d33ceb..3f82d16865 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -73,7 +73,7 @@ public String getName() { return name; } public static final List nonNativeTableAllowedTypes = - ImmutableList.of(ADDPROPS, DROPPROPS); + ImmutableList.of(ADDPROPS, DROPPROPS, ADDCOLS); } public static enum ProtectModeType { diff --git a/ql/src/test/queries/clientpositive/druidmini_test_alter.q b/ql/src/test/queries/clientpositive/druidmini_test_alter.q new file mode 100644 index 0000000000..15ae952d6a --- /dev/null +++ b/ql/src/test/queries/clientpositive/druidmini_test_alter.q @@ -0,0 +1,52 @@ +CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL; + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc; + +ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int); + +DESCRIBE druid_alltypesorc; + +DESCRIBE extended druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL; + + +SELECT COUNT(*) FROM druid_alltypesorc; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL; + +SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL; + +DROP TABLE druid_alltypesorc; diff --git a/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out b/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out new file mode 100644 index 0000000000..9aef045021 --- /dev/null +++ b/ql/src/test/results/clientpositive/druid/druidmini_test_alter.q.out @@ -0,0 +1,210 @@ +PREHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: CREATE TABLE druid_alltypesorc +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1 +FROM alltypesorc where ctimestamp2 IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_alltypesorc +POSTHOOK: Lineage: druid_alltypesorc.__time EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: druid_alltypesorc.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: DESCRIBE druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +PREHOOK: query: DESCRIBE extended druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE extended druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer + +#### A masked pattern was here #### +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3033 +PREHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: ALTER TABLE druid_alltypesorc ADD COLUMNS (cstring2 string, cboolean2 boolean, cint2 int) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: DESCRIBE druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +cstring2 string from deserializer +cboolean2 boolean from deserializer +cint2 int from deserializer +PREHOOK: query: DESCRIBE extended druid_alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@druid_alltypesorc +POSTHOOK: query: DESCRIBE extended druid_alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@druid_alltypesorc +__time timestamp with local time zone from deserializer +cstring1 string from deserializer +cdouble double from deserializer +cfloat float from deserializer +ctinyint tinyint from deserializer +csmallint smallint from deserializer +cint int from deserializer +cbigint bigint from deserializer +cboolean1 boolean from deserializer +cstring2 string from deserializer +cboolean2 boolean from deserializer +cint2 int from deserializer + +#### A masked pattern was here #### +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: INSERT INTO TABLE druid_alltypesorc + SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, +cstring1, +cdouble, +cfloat, +ctinyint, +csmallint, +cint, +cbigint, +cboolean1, +cstring2, +cboolean2, +cint +FROM alltypesorc where ctimestamp1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@druid_alltypesorc +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +9138 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +3041 +PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc WHERE cstring2 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: hdfs://### HDFS PATH ### +6097 +PREHOOK: query: DROP TABLE druid_alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_alltypesorc +PREHOOK: Output: default@druid_alltypesorc +POSTHOOK: query: DROP TABLE druid_alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_alltypesorc +POSTHOOK: Output: default@druid_alltypesorc diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 2534fa2212..280075dc87 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -20,9 +20,14 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; +import com.google.common.collect.ImmutableList; + +import java.util.List; + /** * HiveMetaHook defines notification methods which are invoked as part * of transactions against the metastore, allowing external catalogs @@ -36,6 +41,11 @@ @InterfaceAudience.Public @InterfaceStability.Stable public interface HiveMetaHook { + + public String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType"; + + public List allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS"); + /** * Called before a new table definition is added to the metastore * during CREATE TABLE. @@ -92,4 +102,21 @@ public void rollbackDropTable(Table table) */ public void commitDropTable(Table table, boolean deleteData) throws MetaException; + + /** + * Called before a table is altered in the metastore + * during ALTER TABLE. + * + * @param table new table definition + */ + public default void preAlterTable(Table table, EnvironmentContext context) throws MetaException { + String alterOpType = context.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + // By default allow only ADDPROPS and DROPPROPS. + // alterOpType is null in case of stats update. + if (alterOpType != null && !allowedAlterTypes.contains(alterOpType)){ + throw new MetaException( + "ALTER TABLE can not be used for " + alterOpType + " to a non-native table "); + } + } + } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index ae42077297..d4bdcf1a79 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -410,6 +410,10 @@ public void alter_table(String defaultDatabaseName, String tblName, Table table, @Override public void alter_table_with_environmentContext(String dbname, String tbl_name, Table new_tbl, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { + HiveMetaHook hook = getHook(new_tbl); + if (hook != null) { + hook.preAlterTable(new_tbl, envContext); + } client.alter_table_with_environment_context(dbname, tbl_name, new_tbl, envContext); } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index e373753cbc..b477ce5c32 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -534,7 +534,11 @@ public static boolean isExternalTable(Table table) { return false; } - return "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); + return isExternal(params); + } + + public static boolean isExternal(Map tableParams){ + return "TRUE".equalsIgnoreCase(tableParams.get("EXTERNAL")); } // check if stats need to be (re)calculated -- 2.11.0 (Apple Git-81)