diff --git data/files/v1.txt data/files/v1.txt new file mode 100644 index 0000000..1ce8833 --- /dev/null +++ data/files/v1.txt @@ -0,0 +1,4 @@ +1111fooabc2013-10-10 12:12:12xyzfunbar2013-10-10 12:12:12lmn2013-11-11 12:12:1292222 +2222fooabc2013-10-10 12:12:12xyzfunbar2013-10-10 12:12:12lmn2013-11-11 12:12:1294444 +3333fooabc2013-10-10 12:12:12xyzfunbar2013-10-10 12:12:12lmn2013-11-11 12:12:1296666 +4444fooabc2013-10-10 12:12:12xyzfunbar2013-10-10 12:12:12lmn2013-11-11 12:12:1298888 diff --git data/files/v2.txt data/files/v2.txt new file mode 100644 index 0000000..5e4c94c --- /dev/null +++ data/files/v2.txt @@ -0,0 +1,9 @@ +111199999 +222299999 +222299999 +444499999 +555599999 +666699999 +666699999 +666699999 +888899999 diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java index ec2c088..8fe2262 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java @@ -94,61 +94,35 @@ void addTranslation(ASTNode node, String replacementText) { int tokenStartIndex = node.getTokenStartIndex(); int tokenStopIndex = node.getTokenStopIndex(); - Translation translation = new Translation(); translation.tokenStopIndex = tokenStopIndex; translation.replacementText = replacementText; // Sanity check for overlap with regions already being expanded assert (tokenStopIndex >= tokenStartIndex); - Map.Entry existingEntry; - existingEntry = translations.floorEntry(tokenStartIndex); - boolean prefix = false; - if (existingEntry != null) { - if (existingEntry.getKey().equals(tokenStartIndex)) { - if (existingEntry.getValue().tokenStopIndex == tokenStopIndex) { - if (existingEntry.getValue().replacementText.equals(replacementText)) { - // exact match for existing mapping: somebody is doing something - // redundant, but we'll let it pass - return; - } - } else if (tokenStopIndex > existingEntry.getValue().tokenStopIndex) { - // is existing mapping a prefix for new mapping? if so, that's also - // redundant, but in this case we need to expand it - prefix = replacementText.startsWith( - existingEntry.getValue().replacementText); - assert(prefix); - } else { - // new mapping is a prefix for existing mapping: ignore it - prefix = existingEntry.getValue().replacementText.startsWith( - replacementText); - assert(prefix); - return; - } - } - if (!prefix) { - assert (existingEntry.getValue().tokenStopIndex < tokenStartIndex); - } - } - if (!prefix) { - existingEntry = translations.ceilingEntry(tokenStartIndex); - if (existingEntry != null) { - assert (existingEntry.getKey() > tokenStopIndex); - } - } - // Is existing entry a suffix of the newer entry and a subset of it? - existingEntry = translations.floorEntry(tokenStopIndex); - if (existingEntry != null) { - if (existingEntry.getKey().equals(tokenStopIndex)) { - if (tokenStartIndex < existingEntry.getKey() && - tokenStopIndex == existingEntry.getKey()) { - // Seems newer entry is a super-set of existing entry, remove existing entry - assert (replacementText.endsWith(existingEntry.getValue().replacementText)); - translations.remove(tokenStopIndex); - } + List subsetEntries = new ArrayList(); + // Is the existing entry and newer entry are subset of one another ? + for (Map.Entry existingEntry : + translations.headMap(tokenStopIndex, true).entrySet()) { + // check if the new entry contains the existing + if (existingEntry.getValue().tokenStopIndex <= tokenStopIndex && + existingEntry.getKey() >= tokenStartIndex) { + // Collect newer entry is if a super-set of existing entry, + assert (replacementText.contains(existingEntry.getValue().replacementText)); + subsetEntries.add(existingEntry.getKey()); + // check if the existing entry contains the new + } else if (existingEntry.getValue().tokenStopIndex >= tokenStopIndex && + existingEntry.getKey() <= tokenStartIndex) { + assert (existingEntry.getValue().replacementText.contains(replacementText)); + // we don't need to add this new entry since there's already an overlapping one + return; } } + // remove any existing entries that are contained by the new one + for (Integer index : subsetEntries) { + translations.remove(index); + } // It's all good: create a new entry in the map (or update existing one) translations.put(tokenStartIndex, translation); diff --git ql/src/test/queries/clientpositive/view_cast.q ql/src/test/queries/clientpositive/view_cast.q new file mode 100644 index 0000000..b0b078e --- /dev/null +++ ql/src/test/queries/clientpositive/view_cast.q @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS atab; +CREATE TABLE IF NOT EXISTS atab (ks_uid BIGINT, sr_uid STRING, sr_id STRING, tstamp STRING, m_id STRING, act STRING, at_sr_uid STRING, tstamp_type STRING, original_m_id STRING, original_tstamp STRING, registered_flag TINYINT, at_ks_uid BIGINT) PARTITIONED BY (dt STRING,nt STRING); +LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw'); +LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw'); + +DROP TABLE IF EXISTS mstab; +CREATE TABLE mstab(ks_uid INT, csc INT) PARTITIONED BY (dt STRING); +LOAD DATA LOCAL INPATH '../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311'); + +DROP VIEW IF EXISTS aa_view_tw; +CREATE VIEW aa_view_tw AS SELECT ks_uid, sr_id, act, at_ks_uid, at_sr_uid, from_unixtime(CAST(CAST( tstamp as BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS act_date, from_unixtime(CAST(CAST( original_tstamp AS BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS content_creation_date FROM atab WHERE dt='20130312' AND nt='tw' AND ks_uid != at_ks_uid; + +DROP VIEW IF EXISTS joined_aa_view_tw; +CREATE VIEW joined_aa_view_tw AS SELECT aa.ks_uid, aa.sr_id, aa.act, at_sr_uid, aa.act_date, aa.at_ks_uid, aa.content_creation_date, coalesce( other.ksc, 10.0) AS at_ksc, coalesce( self.ksc , 10.0 ) AS self_ksc FROM aa_view_tw aa LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) self ON ( CAST(aa.ks_uid AS BIGINT) = CAST(self.ks_uid AS BIGINT) ) LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) other ON ( CAST(aa.at_ks_uid AS BIGINT) = CAST(other.ks_uid AS BIGINT) ); + +SELECT * FROM joined_aa_view_tw; diff --git ql/src/test/results/clientpositive/view_cast.q.out ql/src/test/results/clientpositive/view_cast.q.out new file mode 100644 index 0000000..f2c0afc --- /dev/null +++ ql/src/test/results/clientpositive/view_cast.q.out @@ -0,0 +1,85 @@ +PREHOOK: query: DROP TABLE IF EXISTS atab +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS atab +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE IF NOT EXISTS atab (ks_uid BIGINT, sr_uid STRING, sr_id STRING, tstamp STRING, m_id STRING, act STRING, at_sr_uid STRING, tstamp_type STRING, original_m_id STRING, original_tstamp STRING, registered_flag TINYINT, at_ks_uid BIGINT) PARTITIONED BY (dt STRING,nt STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE IF NOT EXISTS atab (ks_uid BIGINT, sr_uid STRING, sr_id STRING, tstamp STRING, m_id STRING, act STRING, at_sr_uid STRING, tstamp_type STRING, original_m_id STRING, original_tstamp STRING, registered_flag TINYINT, at_ks_uid BIGINT) PARTITIONED BY (dt STRING,nt STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@atab +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw') +PREHOOK: type: LOAD +PREHOOK: Output: default@atab +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130312', nt='tw') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@atab +POSTHOOK: Output: default@atab@dt=20130312/nt=tw +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw') +PREHOOK: type: LOAD +PREHOOK: Output: default@atab +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v1.txt' INTO TABLE atab PARTITION (dt='20130311', nt='tw') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@atab +POSTHOOK: Output: default@atab@dt=20130311/nt=tw +PREHOOK: query: DROP TABLE IF EXISTS mstab +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS mstab +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE mstab(ks_uid INT, csc INT) PARTITIONED BY (dt STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE mstab(ks_uid INT, csc INT) PARTITIONED BY (dt STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@mstab +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311') +PREHOOK: type: LOAD +PREHOOK: Output: default@mstab +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/v2.txt' INTO TABLE mstab PARTITION (dt='20130311') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@mstab +POSTHOOK: Output: default@mstab@dt=20130311 +PREHOOK: query: DROP VIEW IF EXISTS aa_view_tw +PREHOOK: type: DROPVIEW +POSTHOOK: query: DROP VIEW IF EXISTS aa_view_tw +POSTHOOK: type: DROPVIEW +PREHOOK: query: CREATE VIEW aa_view_tw AS SELECT ks_uid, sr_id, act, at_ks_uid, at_sr_uid, from_unixtime(CAST(CAST( tstamp as BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS act_date, from_unixtime(CAST(CAST( original_tstamp AS BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS content_creation_date FROM atab WHERE dt='20130312' AND nt='tw' AND ks_uid != at_ks_uid +PREHOOK: type: CREATEVIEW +POSTHOOK: query: CREATE VIEW aa_view_tw AS SELECT ks_uid, sr_id, act, at_ks_uid, at_sr_uid, from_unixtime(CAST(CAST( tstamp as BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS act_date, from_unixtime(CAST(CAST( original_tstamp AS BIGINT)/1000 AS BIGINT),'yyyyMMdd') AS content_creation_date FROM atab WHERE dt='20130312' AND nt='tw' AND ks_uid != at_ks_uid +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@aa_view_tw +PREHOOK: query: DROP VIEW IF EXISTS joined_aa_view_tw +PREHOOK: type: DROPVIEW +POSTHOOK: query: DROP VIEW IF EXISTS joined_aa_view_tw +POSTHOOK: type: DROPVIEW +PREHOOK: query: CREATE VIEW joined_aa_view_tw AS SELECT aa.ks_uid, aa.sr_id, aa.act, at_sr_uid, aa.act_date, aa.at_ks_uid, aa.content_creation_date, coalesce( other.ksc, 10.0) AS at_ksc, coalesce( self.ksc , 10.0 ) AS self_ksc FROM aa_view_tw aa LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) self ON ( CAST(aa.ks_uid AS BIGINT) = CAST(self.ks_uid AS BIGINT) ) LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) other ON ( CAST(aa.at_ks_uid AS BIGINT) = CAST(other.ks_uid AS BIGINT) ) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@aa_view_tw +POSTHOOK: query: CREATE VIEW joined_aa_view_tw AS SELECT aa.ks_uid, aa.sr_id, aa.act, at_sr_uid, aa.act_date, aa.at_ks_uid, aa.content_creation_date, coalesce( other.ksc, 10.0) AS at_ksc, coalesce( self.ksc , 10.0 ) AS self_ksc FROM aa_view_tw aa LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) self ON ( CAST(aa.ks_uid AS BIGINT) = CAST(self.ks_uid AS BIGINT) ) LEFT OUTER JOIN ( SELECT ks_uid, csc AS ksc FROM mstab WHERE dt='20130311' ) other ON ( CAST(aa.at_ks_uid AS BIGINT) = CAST(other.ks_uid AS BIGINT) ) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@aa_view_tw +POSTHOOK: Output: default@joined_aa_view_tw +PREHOOK: query: SELECT * FROM joined_aa_view_tw +PREHOOK: type: QUERY +PREHOOK: Input: default@aa_view_tw +PREHOOK: Input: default@atab +PREHOOK: Input: default@atab@dt=20130312/nt=tw +PREHOOK: Input: default@joined_aa_view_tw +PREHOOK: Input: default@mstab +PREHOOK: Input: default@mstab@dt=20130311 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM joined_aa_view_tw +POSTHOOK: type: QUERY +POSTHOOK: Input: default@aa_view_tw +POSTHOOK: Input: default@atab +POSTHOOK: Input: default@atab@dt=20130312/nt=tw +POSTHOOK: Input: default@joined_aa_view_tw +POSTHOOK: Input: default@mstab +POSTHOOK: Input: default@mstab@dt=20130311 +#### A masked pattern was here #### +1111 abc fun bar NULL 2222 NULL 99999.0 99999.0 +1111 abc fun bar NULL 2222 NULL 99999.0 99999.0 +2222 abc fun bar NULL 4444 NULL 99999.0 99999.0 +2222 abc fun bar NULL 4444 NULL 99999.0 99999.0 +3333 abc fun bar NULL 6666 NULL 99999.0 10.0 +3333 abc fun bar NULL 6666 NULL 99999.0 10.0 +3333 abc fun bar NULL 6666 NULL 99999.0 10.0 +4444 abc fun bar NULL 8888 NULL 99999.0 99999.0