diff --git metastore/scripts/upgrade/derby/012-HIVE-1362.derby.sql metastore/scripts/upgrade/derby/012-HIVE-1362.derby.sql new file mode 100644 index 0000000..3ade7a3 --- /dev/null +++ metastore/scripts/upgrade/derby/012-HIVE-1362.derby.sql @@ -0,0 +1,46 @@ +CREATE TABLE TAB_COL_STATS( +DB_NAME VARCHAR(128) NOT NULL, +TABLE_NAME VARCHAR(128) NOT NULL, +COLUMN_NAME VARCHAR(128) NOT NULL, +COLUMN_TYPE VARCHAR(128) NOT NULL, +LONG_LOW_VALUE BIGINT, +LONG_HIGH_VALUE BIGINT, +DOUBLE_LOW_VALUE DOUBLE, +DOUBLE_HIGH_VALUE DOUBLE, +NUM_DISTINCTS BIGINT, +NUM_NULLS BIGINT NOT NULL, +AVG_COL_LEN DOUBLE, +MAX_COL_LEN BIGINT, +NUM_TRUES BIGINT, +NUM_FALSES BIGINT, +LAST_ANALYZED BIGINT, +CS_ID BIGINT NOT NULL, +TBL_ID BIGINT NOT NULL +); + +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT "TAB_COL_STATS_PK" PRIMARY KEY ("CS_ID"); +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT "TAB_COL_STATS_FK" FOREIGN KEY ("TBL_ID") REFERENCES TBLS("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +CREATE TABLE PART_COL_STATS( +DB_NAME VARCHAR(128) NOT NULL, +TABLE_NAME VARCHAR(128) NOT NULL, +PARTITION_NAME VARCHAR(767) NOT NULL, +COLUMN_NAME VARCHAR(128) NOT NULL, +COLUMN_TYPE VARCHAR(128) NOT NULL, +LONG_LOW_VALUE BIGINT, +LONG_HIGH_VALUE BIGINT, +DOUBLE_LOW_VALUE DOUBLE, +DOUBLE_HIGH_VALUE DOUBLE, +NUM_DISTINCTS BIGINT, +NUM_NULLS BIGINT NOT NULL, +AVG_COL_LEN DOUBLE, +MAX_COL_LEN BIGINT, +NUM_TRUES BIGINT, +NUM_FALSES BIGINT, +LAST_ANALYZED BIGINT, +CS_ID BIGINT NOT NULL, +PART_ID BIGINT NOT NULL +); + +ALTER TABLE PART_COL_STATS ADD CONSTRAINT "PART_COL_STATS_PK" PRIMARY KEY ("CS_ID"); +ALTER TABLE PART_COL_STATS ADD CONSTRAINT "PART_COL_STATS_FK" FOREIGN KEY ("PART_ID") REFERENCES PARTITIONS("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; diff --git metastore/scripts/upgrade/derby/hive-schema-0.10.0.derby.sql metastore/scripts/upgrade/derby/hive-schema-0.10.0.derby.sql index 1be707e..a46fc49 100644 --- metastore/scripts/upgrade/derby/hive-schema-0.10.0.derby.sql +++ metastore/scripts/upgrade/derby/hive-schema-0.10.0.derby.sql @@ -90,6 +90,10 @@ CREATE TABLE "APP"."SKEWED_COL_VALUE_LOCATION_MAPPING" ("SD_ID" BIGINT NOT NULL, CREATE TABLE "APP"."SKEWED_VALUES" ("SD_ID_OID" BIGINT NOT NULL, "STRING_LIST_ID_EID" BIGINT NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); +CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL); + -- ---------------------------------------------- -- DDL Statements for indexes -- ---------------------------------------------- @@ -199,6 +203,10 @@ ALTER TABLE "APP"."SKEWED_COL_VALUE_LOCATION_MAPPING" ADD CONSTRAINT "SKEWED_COL ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_PK" PRIMARY KEY ("SD_ID_OID", "INTEGER_IDX"); +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + -- foreign ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK1" FOREIGN KEY ("ORIG_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; @@ -268,6 +276,10 @@ ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK1" FOREIGN KEY ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK2" FOREIGN KEY ("STRING_LIST_ID_EID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_FK" FOREIGN KEY ("TBL_ID") REFERENCES TBLS("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_FK" FOREIGN KEY ("PART_ID") REFERENCES PARTITIONS("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + -- ---------------------------------------------- -- DDL Statements for checks -- ---------------------------------------------- diff --git metastore/scripts/upgrade/derby/upgrade-0.9.0-to-0.10.0.derby.sql metastore/scripts/upgrade/derby/upgrade-0.9.0-to-0.10.0.derby.sql index 714e9d9..e41a7c2 100644 --- metastore/scripts/upgrade/derby/upgrade-0.9.0-to-0.10.0.derby.sql +++ metastore/scripts/upgrade/derby/upgrade-0.9.0-to-0.10.0.derby.sql @@ -1,3 +1,4 @@ -- Upgrade MetaStore schema from 0.9.0 to 0.10.0 RUN '010-HIVE-3072.derby.sql'; RUN '010-HIVE-3649.derby.sql'; +RUN '012-HIVE-1362.derby.sql'; diff --git metastore/scripts/upgrade/mysql/012-HIVE-1362.mysql.sql metastore/scripts/upgrade/mysql/012-HIVE-1362.mysql.sql new file mode 100644 index 0000000..3b96370 --- /dev/null +++ metastore/scripts/upgrade/mysql/012-HIVE-1362.mysql.sql @@ -0,0 +1,46 @@ +SELECT '< HIVE-1362 Column Statistics Support in Hive >' AS ' '; + +CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` ( + `CS_ID` bigint(20) NOT NULL, + `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TBL_ID` bigint(20) NOT NULL, + `LONG_LOW_VALUE` bigint(20), + `LONG_HIGH_VALUE` bigint(20), + `DOUBLE_LOW_VALUE` double(53,4), + `DOUBLE_HIGH_VALUE` double(53,4), + `NUM_NULLS` bigint(20) NOT NULL, + `NUM_DISTINCTS` bigint(20), + `AVG_COL_LEN` double(53,4), + `MAX_COL_LEN` bigint(20), + `NUM_TRUES` bigint(20), + `NUM_FALSES` bigint(20), + `LAST_ANALYZED` bigint(20) NOT NULL, + PRIMARY KEY (`CS_ID`), + CONSTRAINT `TAB_COL_STATS_FK` FOREIGN KEY (`TBL_ID`) REFERENCES `TBLS` (`TBL_ID`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +CREATE TABLE IF NOT EXISTS `PART_COL_STATS` ( + `CS_ID` bigint(20) NOT NULL, + `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `PART_ID` bigint(20) NOT NULL, + `LONG_LOW_VALUE` bigint(20), + `LONG_HIGH_VALUE` bigint(20), + `DOUBLE_LOW_VALUE` double(53,4), + `DOUBLE_HIGH_VALUE` double(53,4), + `NUM_NULLS` bigint(20) NOT NULL, + `NUM_DISTINCTS` bigint(20), + `AVG_COL_LEN` double(53,4), + `MAX_COL_LEN` bigint(20), + `NUM_TRUES` bigint(20), + `NUM_FALSES` bigint(20), + `LAST_ANALYZED` bigint(20) NOT NULL, + PRIMARY KEY (`CS_ID`), + CONSTRAINT `PART_COL_STATS_FK` FOREIGN KEY (`PART_ID`) REFERENCES `PARTITIONS` (`PART_ID`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; \ No newline at end of file diff --git metastore/scripts/upgrade/mysql/hive-schema-0.10.0.mysql.sql metastore/scripts/upgrade/mysql/hive-schema-0.10.0.mysql.sql index 97de3db..2e3fcaf 100644 --- metastore/scripts/upgrade/mysql/hive-schema-0.10.0.mysql.sql +++ metastore/scripts/upgrade/mysql/hive-schema-0.10.0.mysql.sql @@ -648,6 +648,57 @@ CREATE TABLE IF NOT EXISTS `TBL_PRIVS` ( /*!40101 SET character_set_client = @saved_cs_client */; -- +-- Table structure for table `TAB_COL_STATS` +-- +CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` ( + `CS_ID` bigint(20) NOT NULL, + `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TBL_ID` bigint(20) NOT NULL, + `LONG_LOW_VALUE` bigint(20), + `LONG_HIGH_VALUE` bigint(20), + `DOUBLE_HIGH_VALUE` double(53,4), + `DOUBLE_LOW_VALUE` double(53,4), + `NUM_NULLS` bigint(20) NOT NULL, + `NUM_DISTINCTS` bigint(20), + `AVG_COL_LEN` double(53,4), + `MAX_COL_LEN` bigint(20), + `NUM_TRUES` bigint(20), + `NUM_FALSES` bigint(20), + `LAST_ANALYZED` bigint(20) NOT NULL, + PRIMARY KEY (`CS_ID`), + CONSTRAINT `TAB_COL_STATS_FK` FOREIGN KEY (`TBL_ID`) REFERENCES `TBLS` (`TBL_ID`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +-- +-- Table structure for table `PART_COL_STATS` +-- +CREATE TABLE IF NOT EXISTS `PART_COL_STATS` ( + `CS_ID` bigint(20) NOT NULL, + `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL, + `PART_ID` bigint(20) NOT NULL, + `LONG_LOW_VALUE` bigint(20), + `LONG_HIGH_VALUE` bigint(20), + `DOUBLE_HIGH_VALUE` double(53,4), + `DOUBLE_LOW_VALUE` double(53,4), + `NUM_NULLS` bigint(20) NOT NULL, + `NUM_DISTINCTS` bigint(20), + `AVG_COL_LEN` double(53,4), + `MAX_COL_LEN` bigint(20), + `NUM_TRUES` bigint(20), + `NUM_FALSES` bigint(20), + `LAST_ANALYZED` bigint(20) NOT NULL, + PRIMARY KEY (`CS_ID`), + CONSTRAINT `PART_COL_STATS_FK` FOREIGN KEY (`PART_ID`) REFERENCES `PARTITIONS` (`PART_ID`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +-- -- Table structure for table `TYPES` -- diff --git metastore/scripts/upgrade/mysql/upgrade-0.9.0-to-0.10.0.mysql.sql metastore/scripts/upgrade/mysql/upgrade-0.9.0-to-0.10.0.mysql.sql index 1a85081..1fcd20b 100644 --- metastore/scripts/upgrade/mysql/upgrade-0.9.0-to-0.10.0.mysql.sql +++ metastore/scripts/upgrade/mysql/upgrade-0.9.0-to-0.10.0.mysql.sql @@ -1,4 +1,5 @@ SELECT 'Upgrading MetaStore schema from 0.9.0 to 0.10.0' AS ' '; SOURCE 010-HIVE-3072.mysql.sql; SOURCE 010-HIVE-3649.mysql.sql; +SOURCE 012-HIVE-1362.mysql.sql; SELECT 'Finished upgrading MetaStore schema from 0.9.0 to 0.10.0' AS ' '; diff --git metastore/scripts/upgrade/oracle/012-HIVE-1362.oracle.sql metastore/scripts/upgrade/oracle/012-HIVE-1362.oracle.sql new file mode 100644 index 0000000..67bd76f --- /dev/null +++ metastore/scripts/upgrade/oracle/012-HIVE-1362.oracle.sql @@ -0,0 +1,48 @@ +CREATE TABLE TAB_COL_STATS ( + CS_ID NUMBER NOT NULL, + DB_NAME VARCHAR2(128) NOT NULL, + TABLE_NAME VARCHAR2(128) NOT NULL, + COLUMN_NAME VARCHAR2(128) NOT NULL, + COLUMN_TYPE VARCHAR2(128) NOT NULL, + TBL_ID NUMBER NOT NULL, + LONG_LOW_VALUE NUMBER, + LONG_HIGH_VALUE NUMBER, + DOUBLE_LOW_VALUE NUMBER, + DOUBLE_HIGH_VALUE NUMBER, + NUM_NULLS NUMBER NOT NULL, + NUM_DISTINCTS NUMBER, + AVG_COL_LEN NUMBER, + MAX_COL_LEN NUMBER, + NUM_TRUES NUMBER, + NUM_FALSES NUMBER, + LAST_ANALYZED NUMBER NOT NULL +); + +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_PK PRIMARY KEY (CS_ID); +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_FK FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) INITIALLY DEFERRED ; +CREATE INDEX TAB_COL_STATS_N49 ON TAB_COL_STATS(TBL_ID); + +CREATE TABLE PART_COL_STATS ( + CS_ID NUMBER NOT NULL, + DB_NAME VARCHAR2(128) NOT NULL, + TABLE_NAME VARCHAR2(128) NOT NULL, + PART_NAME VARCHAR2(767) NOT NULL, + COLUMN_NAME VARCHAR2(128) NOT NULL, + COLUMN_TYPE VARCHAR2(128) NOT NULL, + PART_ID NUMBER NOT NULL, + LONG_LOW_VALUE NUMBER, + LONG_HIGH_VALUE NUMBER, + DOUBLE_LOW_VALUE NUMBER, + DOUBLE_HIGH_VALUE NUMBER, + NUM_NULLS NUMBER NOT NULL, + NUM_DISTINCTS NUMBER, + AVG_COL_LEN NUMBER, + MAX_COL_LEN NUMBER, + NUM_TRUES NUMBER, + NUM_FALSES NUMBER, + LAST_ANALYZED NUMBER NOT NULL +); + +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_PK PRIMARY KEY (CS_ID); +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_FK FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) INITIALLY DEFERRED; +CREATE INDEX PART_COL_STATS_N49 ON PART_COL_STATS (PART_ID); diff --git metastore/scripts/upgrade/oracle/hive-schema-0.10.0.oracle.sql metastore/scripts/upgrade/oracle/hive-schema-0.10.0.oracle.sql index 029b931..8e2cdc2 100644 --- metastore/scripts/upgrade/oracle/hive-schema-0.10.0.oracle.sql +++ metastore/scripts/upgrade/oracle/hive-schema-0.10.0.oracle.sql @@ -1,5 +1,3 @@ - - -- Table SEQUENCE_TABLE is an internal table required by DataNucleus. -- NOTE: Some versions of SchemaTool do not automatically generate this table. -- See http://www.datanucleus.org/servlet/jira/browse/NUCRDBMS-416 @@ -449,6 +447,61 @@ ALTER TABLE SKEWED_VALUES ADD CONSTRAINT SKEWED_VALUES_FK1 FOREIGN KEY (STRING_L ALTER TABLE SKEWED_VALUES ADD CONSTRAINT SKEWED_VALUES_FK2 FOREIGN KEY (SD_ID_OID) REFERENCES SDS (SD_ID) INITIALLY DEFERRED ; +-- column statistics + +CREATE TABLE TAB_COL_STATS ( + CS_ID NUMBER NOT NULL, + DB_NAME VARCHAR2(128) NOT NULL, + TABLE_NAME VARCHAR2(128) NOT NULL, + COLUMN_NAME VARCHAR2(128) NOT NULL, + COLUMN_TYPE VARCHAR2(128) NOT NULL, + TBL_ID NUMBER NOT NULL, + LONG_LOW_VALUE NUMBER, + LONG_HIGH_VALUE NUMBER, + DOUBLE_LOW_VALUE NUMBER, + DOUBLE_HIGH_VALUE NUMBER, + NUM_NULLS NUMBER NOT NULL, + NUM_DISTINCTS NUMBER, + AVG_COL_LEN NUMBER, + MAX_COL_LEN NUMBER, + NUM_TRUES NUMBER, + NUM_FALSES NUMBER, + LAST_ANALYZED NUMBER NOT NULL +); + +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_PKEY PRIMARY KEY (CS_ID); + +ALTER TABLE TAB_COL_STATS ADD CONSTRAINT TAB_COL_STATS_FK FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) INITIALLY DEFERRED ; + +CREATE INDEX TAB_COL_STATS_N49 ON TAB_COL_STATS(TBL_ID); + +CREATE TABLE PART_COL_STATS ( + CS_ID NUMBER NOT NULL, + DB_NAME VARCHAR2(128) NOT NULL, + TABLE_NAME VARCHAR2(128) NOT NULL, + PART_NAME VARCHAR2(767) NOT NULL, + COLUMN_NAME VARCHAR2(128) NOT NULL, + COLUMN_TYPE VARCHAR2(128) NOT NULL, + PART_ID NUMBER NOT NULL, + LONG_LOW_VALUE NUMBER, + LONG_HIGH_VALUE NUMBER, + DOUBLE_LOW_VALUE NUMBER, + DOUBLE_HIGH_VALUE NUMBER, + NUM_NULLS NUMBER NOT NULL, + NUM_DISTINCTS NUMBER, + AVG_COL_LEN NUMBER, + MAX_COL_LEN NUMBER, + NUM_TRUES NUMBER, + NUM_FALSES NUMBER, + LAST_ANALYZED NUMBER NOT NULL +); + +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_PKEY PRIMARY KEY (CS_ID); + +ALTER TABLE PART_COL_STATS ADD CONSTRAINT PART_COL_STATS_FK FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) INITIALLY DEFERRED; + +CREATE INDEX PART_COL_STATS_N49 ON PART_COL_STATS (PART_ID); + -- Constraints for table PART_COL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege] ALTER TABLE PART_COL_PRIVS ADD CONSTRAINT PART_COL_PRIVS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) INITIALLY DEFERRED ; @@ -486,7 +539,7 @@ CREATE UNIQUE INDEX UNIQUE_TYPE ON TYPES (TYPE_NAME); -- Constraints for table PARTITION_KEYS -ALTER TABLE PARTITION_KEYS ADD CONSTRAINT PARTITION_KEYS_FK1 FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID) INITIALLY DEFERRED ; +ALTER TABLE PARTITION_KEYS ADD CONSTRAINT PARTITION_KEYS_FK1 FOREIGN KEY (TBTB_ID) REFERENCES TBLS (TBL_ID) INITIALLY DEFERRED ; CREATE INDEX PARTITION_KEYS_N49 ON PARTITION_KEYS (TBL_ID); diff --git metastore/scripts/upgrade/postgres/012-HIVE-1362.postgres.sql metastore/scripts/upgrade/postgres/012-HIVE-1362.postgres.sql new file mode 100644 index 0000000..87b699d --- /dev/null +++ metastore/scripts/upgrade/postgres/012-HIVE-1362.postgres.sql @@ -0,0 +1,51 @@ +SELECT '< HIVE-1362 Column Statistics Support in Hive >'; + +CREATE TABLE "TAB_COL_STATS" ( + "CS_ID" bigint NOT NULL, + "DB_NAME" character varying(128) DEFAULT NULL::character varying, + "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, + "TBL_ID" bigint NOT NULL, + "LONG_LOW_VALUE" bigint, + "LONG_HIGH_VALUE" bigint, + "DOUBLE_LOW_VALUE", double precision, + "DOUBLE_HIGH_VALUES", double precision, + "NUM_NULLS" bigint NOT NULL, + "NUM_DISTINCTS" bigint, + "AVG_COL_LEN" double precision, + "MAX_COL_LEN" bigint, + "NUM_TRUES" bigint, + "NUM_FALSES" bigint, + "LAST_ANALYZED" bigint NOT NULL +); + + +ALTER TABLE ONLY "TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_pkey" PRIMARY KEY("CS_ID"); +ALTER TABLE ONLY "TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_fkey" FOREIGN KEY("TBL_ID") REFERENCES "TBLS"("TBL_ID") DEFERRABLE; +CREATE INDEX "TAB_COL_STATS_N49" ON "TAB_COL_STATS" USING btree ("TBL_ID"); + +CREATE TABLE "PART_COL_STATS" ( + "CS_ID" bigint NOT NULL, + "DB_NAME" character varying(128) DEFAULT NULL::character varying, + "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, + "PART_NAME" character varying(767) DEFAULT NULL::character varying, + "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, + "PART_ID" bigint NOT NULL, + "LONG_LOW_VALUE" bigint, + "LONG_HIGH_VALUE" bigint, + "DOUBLE_LOW_VALUE", double precision, + "DOUBLE_HIGH_VALUES", double precision, + "NUM_NULLS" bigint NOT NULL, + "NUM_DISTINCTS" bigint, + "AVG_COL_LEN" double precision, + "MAX_COL_LEN" bigint, + "NUM_TRUES" bigint, + "NUM_FALSES" bigint, + "LAST_ANALYZED" bigint NOT NULL +); + +ALTER TABLE ONLY "PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_pkey" PRIMARY KEY("CS_ID"); +ALTER TABLE ONLY "PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_fkey" FOREIGN KEY("PART_ID") REFERENCES "PARTITIONS"("PART_ID") DEFERRABLE; +CREATE INDEX "PART_COL_STATS_N49" ON "PART_COL_STATS" USING btree ("PART_ID"); \ No newline at end of file diff --git metastore/scripts/upgrade/postgres/hive-schema-0.10.0.postgres.sql metastore/scripts/upgrade/postgres/hive-schema-0.10.0.postgres.sql index 2f61644..1e1ceb9 100644 --- metastore/scripts/upgrade/postgres/hive-schema-0.10.0.postgres.sql +++ metastore/scripts/upgrade/postgres/hive-schema-0.10.0.postgres.sql @@ -475,6 +475,54 @@ CREATE TABLE "SKEWED_VALUES" ( ); +-- +-- Name: TAB_COL_STATS Type: TABLE; Schema: public; Owner: hiveuser; Tablespace: +-- + +CREATE TABLE "TAB_COL_STATS" ( + "CS_ID" bigint NOT NULL, + "DB_NAME" character varying(128) DEFAULT NULL::character varying, + "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, + "TBL_ID" bigint NOT NULL, + "LONG_LOW_VALUE" bigint, + "LONG_HIGH_VALUE" bigint, + "DOUBLE_LOW_VALUE" double precision, + "DOUBLE_HIGH_VALUE" double precision, + "NUM_NULLS" bigint NOT NULL, + "NUM_DISTINCTS" bigint, + "AVG_COL_LEN" double precision, + "MAX_COL_LEN" bigint, + "NUM_TRUES" bigint, + "NUM_FALSES" bigint, + "LAST_ANALYZED" bigint NOT NULL +); + +-- +-- Name: PART_COL_STATS Type: TABLE; Schema: public; Owner: hiveuser; Tablespace: +-- + +CREATE TABLE "PART_COL_STATS" ( + "CS_ID" bigint NOT NULL, + "DB_NAME" character varying(128) DEFAULT NULL::character varying, + "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, + "PART_NAME" character varying(767) DEFAULT NULL::character varying, + "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, + "PART_ID" bigint NOT NULL, + "LONG_LOW_VALUE" bigint, + "LONG_HIGH_VALUE" bigint, + "DOUBLE_LOW_VALUE" double precision, + "DOUBLE_HIGH_VALUE" double precision, + "NUM_NULLS" bigint NOT NULL, + "NUM_DISTINCTS" bigint, + "AVG_COL_LEN" double precision, + "MAX_COL_LEN" bigint, + "NUM_TRUES" bigint, + "NUM_FALSES" bigint, + "LAST_ANALYZED" bigint NOT NULL +); -- -- Name: BUCKETING_COLS_pkey; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: @@ -772,6 +820,16 @@ ALTER TABLE ONLY "SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_pkey" PRIMARY KEY ("SD_ID_OID", "INTEGER_IDX"); -- +-- Name: TAB_COL_STATS_pkey; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: +-- +ALTER TABLE ONLY "TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_pkey" PRIMARY KEY("CS_ID"); + +-- +-- Name: PART_COL_STATS_pkey; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: +-- +ALTER TABLE ONLY "PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_pkey" PRIMARY KEY("CS_ID"); + +-- -- Name: UNIQUEINDEX; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: -- @@ -1035,6 +1093,17 @@ CREATE INDEX "TBL_PRIVS_N49" ON "TBL_PRIVS" USING btree ("TBL_ID"); CREATE INDEX "TYPE_FIELDS_N49" ON "TYPE_FIELDS" USING btree ("TYPE_NAME"); +-- +-- Name: TAB_COL_STATS_N49; Type: INDEX; Schema: public; Owner: hiveuser; Tablespace: +-- + +CREATE INDEX "TAB_COL_STATS_N49" ON "TAB_COL_STATS" USING btree ("TBL_ID"); + +-- +-- Name: PART_COL_STATS_N49; Type: INDEX; Schema: public; Owner: hiveuser; Tablespace: +-- + +CREATE INDEX "PART_COL_STATS_N49" ON "PART_COL_STATS" USING btree ("PART_ID"); ALTER TABLE ONLY "SKEWED_STRING_LIST_VALUES" @@ -1281,6 +1350,16 @@ ALTER TABLE ONLY "TBL_PRIVS" ALTER TABLE ONLY "TYPE_FIELDS" ADD CONSTRAINT "TYPE_FIELDS_TYPE_NAME_fkey" FOREIGN KEY ("TYPE_NAME") REFERENCES "TYPES"("TYPES_ID") DEFERRABLE; +-- +-- Name: TAB_COL_STATS_fkey; Type: FK CONSTRAINT; Schema: public; Owner: hiveuser +-- +ALTER TABLE ONLY "TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_fkey" FOREIGN KEY("TBL_ID") REFERENCES "TBLS"("TBL_ID") DEFERRABLE; + + +-- +-- Name: PART_COL_STATS_fkey; Type: FK CONSTRAINT; Schema: public; Owner: hiveuser +-- +ALTER TABLE ONLY "PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_fkey" FOREIGN KEY("PART_ID") REFERENCES "PARTITIONS"("PART_ID") DEFERRABLE; -- -- Name: public; Type: ACL; Schema: -; Owner: hiveuser diff --git metastore/scripts/upgrade/postgres/upgrade-0.9.0-to-0.10.0.postgres.sql metastore/scripts/upgrade/postgres/upgrade-0.9.0-to-0.10.0.postgres.sql index d3b6571..f38e46a 100644 --- metastore/scripts/upgrade/postgres/upgrade-0.9.0-to-0.10.0.postgres.sql +++ metastore/scripts/upgrade/postgres/upgrade-0.9.0-to-0.10.0.postgres.sql @@ -1,4 +1,5 @@ SELECT 'Upgrading MetaStore schema from 0.9.0 to 0.10.0'; \i 010-HIVE-3072.postgres.sql; \i 010-HIVE-3649.postgres.sql; +\i 012-HIVE-1362.postgres.sql; SELECT 'Finished upgrading MetaStore schema from 0.9.0 to 0.10.0'; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index ecc69a2..9a2836e 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -4566,15 +4566,15 @@ public class ObjectStore implements RawStore, Configurable { colType.equalsIgnoreCase("timestamp")) { LongColumnStatsData longStats = new LongColumnStatsData(); longStats.setNumNulls(mStatsObj.getNumNulls()); - longStats.setHighValue(mStatsObj.getHighValueAsLong()); - longStats.setLowValue(mStatsObj.getLowValueAsLong()); + longStats.setHighValue(mStatsObj.getLongHighValue()); + longStats.setLowValue(mStatsObj.getLongLowValue()); longStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setLongStats(longStats); } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); doubleStats.setNumNulls(mStatsObj.getNumNulls()); - doubleStats.setHighValue(mStatsObj.getHighValueAsDouble()); - doubleStats.setLowValue(mStatsObj.getLowValueAsDouble()); + doubleStats.setHighValue(mStatsObj.getDoubleHighValue()); + doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); doubleStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDoubleStats(doubleStats); } @@ -4695,9 +4695,11 @@ public class ObjectStore implements RawStore, Configurable { if (oldStatsObj != null) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - oldStatsObj.setHighValue(mStatsObj.getHighValue()); + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); - oldStatsObj.setLowValue(mStatsObj.getLowValue()); + oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); + oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); @@ -4740,15 +4742,15 @@ public class ObjectStore implements RawStore, Configurable { colType.equalsIgnoreCase("timestamp")) { LongColumnStatsData longStats = new LongColumnStatsData(); longStats.setNumNulls(mStatsObj.getNumNulls()); - longStats.setHighValue(mStatsObj.getHighValueAsLong()); - longStats.setLowValue(mStatsObj.getLowValueAsLong()); + longStats.setHighValue(mStatsObj.getLongHighValue()); + longStats.setLowValue(mStatsObj.getLongLowValue()); longStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setLongStats(longStats); } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); doubleStats.setNumNulls(mStatsObj.getNumNulls()); - doubleStats.setHighValue(mStatsObj.getHighValueAsDouble()); - doubleStats.setLowValue(mStatsObj.getLowValueAsDouble()); + doubleStats.setHighValue(mStatsObj.getDoubleHighValue()); + doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); doubleStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDoubleStats(doubleStats); } @@ -4817,9 +4819,11 @@ public class ObjectStore implements RawStore, Configurable { partName, partVal, colName); if (oldStatsObj != null) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - oldStatsObj.setHighValue(mStatsObj.getHighValue()); + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); - oldStatsObj.setLowValue(mStatsObj.getLowValue()); + oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); + oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 500ff29..eb23cf9 100644 --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -21,7 +21,6 @@ */ package org.apache.hadoop.hive.metastore.model; -import java.nio.ByteBuffer; /** @@ -41,8 +40,10 @@ public class MPartitionColumnStatistics { private String colName; private String colType; - private byte[] lowValue; - private byte[] highValue; + private long longLowValue; + private long longHighValue; + private double doubleLowValue; + private double doubleHighValue; private long numNulls; private long numDVs; private double avgColLen; @@ -69,42 +70,6 @@ public class MPartitionColumnStatistics { this.colName = colName; } - public byte[] getLowValue() { - return lowValue; - } - - public long getLowValueAsLong() { - ByteBuffer byteBuf = ByteBuffer.wrap(lowValue); - return byteBuf.getLong(); - } - - public double getLowValueAsDouble() { - ByteBuffer byteBuf = ByteBuffer.wrap(lowValue); - return byteBuf.getDouble(); - } - - public byte[] getHighValue() { - return highValue; - } - - public long getHighValueAsLong() { - ByteBuffer byteBuf = ByteBuffer.wrap(highValue); - return byteBuf.getLong(); - } - - public double getHighValueAsDouble() { - ByteBuffer byteBuf = ByteBuffer.wrap(highValue); - return byteBuf.getDouble(); - } - - public void setHighValue(byte[] b) { - this.highValue = b; - } - - public void setLowValue(byte[] b) { - this.lowValue = b; - } - public long getNumNulls() { return numNulls; } @@ -202,19 +167,15 @@ public class MPartitionColumnStatistics { public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; - byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array(); - this.lowValue = bytes; - bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array(); - this.highValue = bytes; + this.longLowValue = lowValue; + this.longHighValue = highValue; } public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; - byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array(); - this.lowValue = bytes; - bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array(); - this.highValue = bytes; + this.doubleLowValue = lowValue; + this.doubleHighValue = highValue; } public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { @@ -229,4 +190,35 @@ public class MPartitionColumnStatistics { this.maxColLen = maxColLen; this.avgColLen = avgColLen; } + public long getLongLowValue() { + return longLowValue; + } + + public void setLongLowValue(long longLowValue) { + this.longLowValue = longLowValue; + } + + public long getLongHighValue() { + return longHighValue; + } + + public void setLongHighValue(long longHighValue) { + this.longHighValue = longHighValue; + } + + public double getDoubleLowValue() { + return doubleLowValue; + } + + public void setDoubleLowValue(double doubleLowValue) { + this.doubleLowValue = doubleLowValue; + } + + public double getDoubleHighValue() { + return doubleHighValue; + } + + public void setDoubleHighValue(double doubleHighValue) { + this.doubleHighValue = doubleHighValue; + } } diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 63bf69b..c7ac9b9 100644 --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -21,7 +21,6 @@ */ package org.apache.hadoop.hive.metastore.model; -import java.nio.ByteBuffer; /** @@ -39,8 +38,10 @@ public class MTableColumnStatistics { private String colName; private String colType; - private byte[] lowValue; - private byte[] highValue; + private long longLowValue; + private long longHighValue; + private double doubleLowValue; + private double doubleHighValue; private long numNulls; private long numDVs; private double avgColLen; @@ -83,47 +84,10 @@ public class MTableColumnStatistics { this.colType = colType; } - public byte[] getLowValue() { - return lowValue; - } - - public long getLowValueAsLong() { - ByteBuffer byteBuf = ByteBuffer.wrap(lowValue); - return byteBuf.getLong(); - } - - public double getLowValueAsDouble() { - ByteBuffer byteBuf = ByteBuffer.wrap(lowValue); - return byteBuf.getDouble(); - } - - public byte[] getHighValue() { - return highValue; - } - - public long getHighValueAsLong() { - ByteBuffer byteBuf = ByteBuffer.wrap(highValue); - return byteBuf.getLong(); - } - - public double getHighValueAsDouble() { - ByteBuffer byteBuf = ByteBuffer.wrap(highValue); - return byteBuf.getDouble(); - } - - public void setHighValue(byte[] b) { - this.highValue = b; - } - - public void setLowValue(byte[] b) { - this.lowValue = b; - } - public long getNumNulls() { return numNulls; } - public void setNumNulls(long numNulls) { this.numNulls = numNulls; } @@ -193,19 +157,15 @@ public class MTableColumnStatistics { public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; - byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array(); - this.lowValue = bytes; - bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array(); - this.highValue = bytes; + this.longLowValue = lowValue; + this.longHighValue = highValue; } public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; - byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array(); - this.lowValue = bytes; - bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array(); - this.highValue = bytes; + this.doubleLowValue = lowValue; + this.doubleHighValue = highValue; } public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { @@ -220,4 +180,36 @@ public class MTableColumnStatistics { this.maxColLen = maxColLen; this.avgColLen = avgColLen; } + + public long getLongLowValue() { + return longLowValue; + } + + public void setLongLowValue(long longLowValue) { + this.longLowValue = longLowValue; + } + + public long getLongHighValue() { + return longHighValue; + } + + public void setLongHighValue(long longHighValue) { + this.longHighValue = longHighValue; + } + + public double getDoubleLowValue() { + return doubleLowValue; + } + + public void setDoubleLowValue(double doubleLowValue) { + this.doubleLowValue = doubleLowValue; + } + + public double getDoubleHighValue() { + return doubleHighValue; + } + + public void setDoubleHighValue(double doubleHighValue) { + this.doubleHighValue = doubleHighValue; + } } diff --git metastore/src/model/package.jdo metastore/src/model/package.jdo index 5f91f97..b0d0e8d 100644 --- metastore/src/model/package.jdo +++ metastore/src/model/package.jdo @@ -775,11 +775,17 @@ - - + + - - + + + + + + + + @@ -815,6 +821,9 @@ + + + @@ -824,11 +833,17 @@ - - + + + + + + + + - - + +