diff --git a/data/files/table1.avsc b/data/files/table1.avsc new file mode 100644 index 0000000000000000000000000000000000000000..2c96ad20e7794d15c3aeab4a85fb75480926ad00 --- /dev/null +++ b/data/files/table1.avsc @@ -0,0 +1,25 @@ +{ + "type" : "record", + "name" : "table1", + "doc" : "Sqoop import of table1", + "fields" : [ { + "name" : "col1", + "type" : [ "null", "string" ], + "default" : null, + "columnName" : "col1", + "sqlType" : "12" + }, { + "name" : "col2", + "type" : [ "null", "long" ], + "default" : null, + "columnName" : "col2", + "sqlType" : "13" + }, { + "name" : "col3", + "type" : [ "null", "string" ], + "default" : null, + "columnName" : "col3", + "sqlType" : "12" + } ], + "tableName" : "table1" +} \ No newline at end of file diff --git a/data/files/table1_1.avsc b/data/files/table1_1.avsc new file mode 100644 index 0000000000000000000000000000000000000000..1a7e518c105e6cd27843af0cd5820161ee9350f3 --- /dev/null +++ b/data/files/table1_1.avsc @@ -0,0 +1,19 @@ +{ + "type" : "record", + "name" : "table1_1", + "doc" : "Sqoop import of table1_1", + "fields" : [ { + "name" : "col1", + "type" : [ "null", "long" ], + "default" : null, + "columnName" : "col1", + "sqlType" : "13" + }, { + "name" : "col2", + "type" : [ "null", "string" ], + "default" : null, + "columnName" : "col2", + "sqlType" : "12" + }], + "tableName" : "table1_1" +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java index 4fccfc169cf0431f16ad6fbc5e57c32a0090f4a4..68138c8be7152d1965d5abf4c75c75392800f700 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java @@ -32,6 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -146,10 +147,10 @@ private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException private boolean pathIsInPartition(Path split, Path partitionPath) { boolean schemeless = split.toUri().getScheme() == null; if (schemeless) { - String schemelessPartitionPath = partitionPath.toUri().getPath(); - return split.toString().startsWith(schemelessPartitionPath); + Path pathNoSchema = Path.getPathWithoutSchemeAndAuthority(partitionPath); + return FileUtils.isPathWithinSubtree(split,pathNoSchema); } else { - return split.toString().startsWith(partitionPath.toString()); + return FileUtils.isPathWithinSubtree(split,partitionPath); } } diff --git a/ql/src/test/queries/clientpositive/avrotblsjoin.q b/ql/src/test/queries/clientpositive/avrotblsjoin.q new file mode 100644 index 0000000000000000000000000000000000000000..8c1f08478dedc059ef3af12df374b8016cc95e9d --- /dev/null +++ b/ql/src/test/queries/clientpositive/avrotblsjoin.q @@ -0,0 +1,28 @@ +drop table if exists table1; +drop table if exists table1_1; + +dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; +dfs -cp ${system:hive.root}data/files/table1_1.avsc ${system:test.tmp.dir}/; + +create table table1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc'); +create table table1_1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1_1.avsc'); +insert into table1 values ("1", "2", "3"); +insert into table1_1 values (1, "2"); +set hive.auto.convert.join=false; +set hive.strict.checks.type.safety=false; +set hive.mapred.mode=nonstrict; +select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1"; diff --git a/ql/src/test/results/clientpositive/avrotblsjoin.q.out b/ql/src/test/results/clientpositive/avrotblsjoin.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d0170a36674c429f2def89b36623d34811e08140 --- /dev/null +++ b/ql/src/test/results/clientpositive/avrotblsjoin.q.out @@ -0,0 +1,82 @@ +PREHOOK: query: drop table if exists table1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists table1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists table1_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists table1_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: create table table1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: create table table1_1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1_1 +POSTHOOK: query: create table table1_1 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1_1 +PREHOOK: query: insert into table1 values ("1", "2", "3") +PREHOOK: type: QUERY +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table1 values ("1", "2", "3") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.col1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.col3 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into table1_1 values (1, "2") +PREHOOK: type: QUERY +PREHOOK: Output: default@table1_1 +POSTHOOK: query: insert into table1_1 values (1, "2") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@table1_1 +POSTHOOK: Lineage: table1_1.col1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1_1.col2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +WARNING: Comparing a bigint and a string may result in a loss of precision. +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1" +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table1_1 +#### A masked pattern was here #### +POSTHOOK: query: select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table1_1 +#### A masked pattern was here #### +1 1 2