diff --git a/hbase-spark/pom.xml b/hbase-spark/pom.xml index 7cd78f0..9d46a3c 100644 --- a/hbase-spark/pom.xml +++ b/hbase-spark/pom.xml @@ -37,9 +37,9 @@ Apache HBase - Spark - 1.6.0 - 2.10.4 - 2.10 + 2.0.0 + 2.11.8 + 2.11 true ${project.basedir}/.. 1.7.6 @@ -693,6 +693,36 @@ + + spark-1.6 + + + spark.profile1.6 + + + + + org.apache.hbase + hbase-spark1.6-compat + ${project.version} + + + + + spark-2.0 + + + !spark.profile + + + + + org.apache.hbase + hbase-spark2.0-compat + ${project.version} + + + skipSparkTests diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala index 1a3c370..c4a5ba9 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala @@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentLinkedQueue import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapred.TableOutputFormat +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources._ import org.apache.hadoop.hbase.types._ import org.apache.hadoop.hbase.util.{Bytes, PositionedByteRange, SimplePositionedMutableByteRange} @@ -32,7 +33,6 @@ import org.apache.hadoop.hbase.HColumnDescriptor import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.mapred.JobConf -import org.apache.spark.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.datasources.hbase.{Utils, Field, HBaseTableCatalog} import org.apache.spark.sql.{DataFrame, SaveMode, Row, SQLContext} diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala index a9b38ba..b57efb0 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding import org.apache.hadoop.hbase.io.hfile.{HFile, CacheConfig, HFileContextBuilder, HFileWriterImpl} import org.apache.hadoop.hbase.regionserver.{HStore, StoreFile, StoreFileWriter, BloomType} +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.mapred.JobConf import org.apache.spark.broadcast.Broadcast @@ -38,7 +39,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.client._ import scala.reflect.ClassTag -import org.apache.spark.{Logging, SerializableWritable, SparkContext} +import org.apache.spark.{SerializableWritable, SparkContext} import org.apache.hadoop.hbase.mapreduce.{TableMapReduceUtil, TableInputFormat, IdentityTableMapper} import org.apache.hadoop.hbase.io.ImmutableBytesWritable diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala index 7deb5b8..cb7cef8 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala @@ -25,6 +25,8 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.api.java.function.{FlatMapFunction, Function, VoidFunction} import org.apache.spark.streaming.api.java.JavaDStream +import java.lang.Iterable + import scala.collection.JavaConversions._ import scala.reflect.ClassTag @@ -103,10 +105,14 @@ class JavaHBaseContext(@transient jsc: JavaSparkContext, f: FlatMapFunction[(java.util.Iterator[T], Connection), R]): JavaRDD[R] = { - def fn = (it: Iterator[T], conn: Connection) => - asScalaIterator( - f.call((asJavaIterator(it), conn)).iterator() - ) + def fn = (it: Iterator[T], conn: Connection) => { + val iter = f.call(asJavaIterator(it), conn) + if (iter.isInstanceOf[Iterable[R]]) { + asScalaIterator(iter.asInstanceOf[Iterable[R]].iterator()) + } else { + asScalaIterator(iter) + } + } JavaRDD.fromRDD(hbaseContext.mapPartitions(javaRdd.rdd, (iterator: Iterator[T], connection: Connection) => diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala index 5b45ef9..f045833 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala @@ -24,7 +24,7 @@ import org.apache.hadoop.hbase.spark._ import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.spark.datasources.HBaseResources._ import org.apache.spark.sql.datasources.hbase.Field -import org.apache.spark.{SparkEnv, TaskContext, Logging, Partition} +import org.apache.spark.{SparkEnv, TaskContext, Partition} import org.apache.spark.rdd.RDD import scala.collection.mutable @@ -33,7 +33,8 @@ class HBaseTableScanRDD(relation: HBaseRelation, val hbaseContext: HBaseContext, @transient val filter: Option[SparkSQLPushDownFilter] = None, val columns: Seq[Field] = Seq.empty - )extends RDD[Result](relation.sqlContext.sparkContext, Nil) with Logging { + ) extends RDD[Result](relation.sqlContext.sparkContext, Nil) + { private def sparkConf = SparkEnv.get.conf @transient var ranges = Seq.empty[Range] @transient var points = Seq.empty[Array[Byte]] diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala index 851fb66..8b67534 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark.datasources +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ /** @@ -102,4 +102,4 @@ object JavaBytesEncoder extends Enumeration with Logging{ new NaiveEncoder() } } -} \ No newline at end of file +} diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala index 3137717..99bc2a9 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark.datasources */ import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala deleted file mode 100644 index 1e56a3d..0000000 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.datasources.hbase - -import org.apache.spark.sql.catalyst.SqlLexical -import org.apache.spark.sql.catalyst.util.DataTypeParser -import org.apache.spark.sql.types.DataType - -object DataTypeParserWrapper { - lazy val dataTypeParser = new DataTypeParser { - override val lexical = new SqlLexical - } - - def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString) -} diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala index c2d611f..5eaedbd 100644 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala +++ b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala @@ -18,12 +18,11 @@ package org.apache.spark.sql.datasources.hbase import org.apache.avro.Schema +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.SchemaConverters import org.apache.hadoop.hbase.spark.datasources._ import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging -import org.apache.spark.sql.catalyst.util.DataTypeParser import org.apache.spark.sql.types._ import org.json4s.jackson.JsonMethods._ @@ -77,7 +76,7 @@ case class Field( } val dt = { - sType.map(DataTypeParser.parse(_)).getOrElse{ + sType.map(DataTypeParserWrapper.parse(_)).getOrElse{ schema.map{ x=> SchemaConverters.toSqlType(x).dataType }.get diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala index 73d054d..7a246e5 100644 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala +++ b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.datasources.hbase import org.apache.hadoop.hbase.spark.AvroSerdes import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.sql.execution.SparkSqlSerializer +//import org.apache.spark.sql.execution.SparkSqlSerializer import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -57,8 +57,8 @@ object Utils { val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) newArray - // TODO: add more data type support - case _ => SparkSqlSerializer.deserialize[Any](src) + // TODO: SparkSqlSerializer.deserialize[Any](src) + case _ => throw new Exception(s"unsupported data type ${f.dt}") } } } diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala index 795ce6d..05b9b38 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala @@ -21,10 +21,11 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hbase.client.{Get, ConnectionFactory} import org.apache.hadoop.hbase.io.hfile.{CacheConfig, HFile} import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.{HConstants, CellUtil, HBaseTestingUtility, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ -import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.SparkContext import org.junit.rules.TemporaryFolder import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala index 0f8baed..f8b053e 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala @@ -20,13 +20,14 @@ package org.apache.hadoop.hbase.spark import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, SQLContext} -import org.apache.spark.{Logging, SparkConf, SparkContext} +import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} case class HBaseRecord( @@ -377,8 +378,8 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { assert(results.length == 2) - assert(executionRules.dynamicLogicExpression.toExpressionString. - equals("( KEY_FIELD <= 0 AND KEY_FIELD >= 1 )")) + val expr = executionRules.dynamicLogicExpression.toExpressionString + assert(expr.equals("( ( KEY_FIELD isNotNull AND KEY_FIELD <= 0 ) AND KEY_FIELD >= 1 )"), expr) assert(executionRules.rowKeyFilter.points.size == 0) assert(executionRules.rowKeyFilter.ranges.size == 1) @@ -653,8 +654,9 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { assert(localResult(0).getInt(2) == 8) val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll() - assert(executionRules.dynamicLogicExpression.toExpressionString. - equals("( I_FIELD > 0 AND I_FIELD < 1 )")) + val expr = executionRules.dynamicLogicExpression.toExpressionString + logInfo(expr) + assert(expr.equals("( ( I_FIELD isNotNull AND I_FIELD > 0 ) AND I_FIELD < 1 )"), expr) } diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala index b9c15ce..f3abc62 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala @@ -19,9 +19,9 @@ package org.apache.hadoop.hbase.spark import java.util +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.{HBaseSparkConf, JavaBytesEncoder} import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala index 49e2f6c..f9e24c8 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.{DoubleSerDes, SerDes} import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.datasources.hbase.{DataTypeParserWrapper, HBaseTableCatalog} import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala index b27cfc7..011ba32 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala @@ -19,7 +19,8 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{ CellUtil, TableName, HBaseTestingUtility} -import org.apache.spark.{SparkException, Logging, SparkContext} +import org.apache.hadoop.hbase.spark.Logging +import org.apache.spark.{SparkException, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseContextSuite extends FunSuite with diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala index 3b14c35..2ad640d 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala @@ -17,11 +17,12 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.client._ +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Milliseconds, StreamingContext} -import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.SparkContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} @@ -139,4 +140,4 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { } } -} \ No newline at end of file +} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala index 89148c3..17921c3 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala @@ -17,10 +17,11 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.client._ +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ -import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.SparkContext import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.mutable diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala index d33ced9..d573279 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala @@ -18,10 +18,11 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.{DataFrame, SQLContext} -import org.apache.spark.{SparkConf, SparkContext, Logging} +import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} case class FilterRangeRecord( diff --git a/pom.xml b/pom.xml index ae7a80a..4ff28d1 100644 --- a/pom.xml +++ b/pom.xml @@ -68,6 +68,8 @@ hbase-protocol hbase-client hbase-hadoop-compat + hbase-spark2.0-compat + hbase-spark1.6-compat hbase-common hbase-procedure hbase-it @@ -1339,6 +1341,16 @@ org.apache.hbase + hbase-spark1.6-compat + ${project.version} + + + org.apache.hbase + hbase-spark2.0-compat + ${project.version} + + + org.apache.hbase hbase-hadoop-compat ${project.version} test-jar diff --git a/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala new file mode 100644 index 0000000..1e56a3d --- /dev/null +++ b/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasources.hbase + +import org.apache.spark.sql.catalyst.SqlLexical +import org.apache.spark.sql.catalyst.util.DataTypeParser +import org.apache.spark.sql.types.DataType + +object DataTypeParserWrapper { + lazy val dataTypeParser = new DataTypeParser { + override val lexical = new SqlLexical + } + + def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString) +} diff --git a/hbase-spark1.6-compat/pom.xml b/hbase-spark1.6-compat/pom.xml new file mode 100644 index 0000000..39b7738 --- /dev/null +++ b/hbase-spark1.6-compat/pom.xml @@ -0,0 +1,210 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark1.6-compat + Apache HBase - Spark 1.6 Compatibility + + Interfaces to be implemented in order to smooth + over spark version differences + + + 1.6.0 + 2.11.8 + 2.11 + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + maven-surefire-plugin + + + + secondPartTestsExecution + test + + test + + + true + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala new file mode 100644 index 0000000..50e281e --- /dev/null +++ b/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasources.hbase + +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.types.DataType + +/* + * This class is to isolate type incompatibilities between Spark 1.6 and Spark 2.0 + */ +object DataTypeParserWrapper { + /* + lazy val dataTypeParser = new DataTypeParser { + override val lexical = new SqlLexical + } + */ + + def parse(dataTypeString: String): DataType = CatalystSqlParser.parseDataType(dataTypeString) +} diff --git a/hbase-spark2.0-compat/pom.xml b/hbase-spark2.0-compat/pom.xml new file mode 100644 index 0000000..2e96fe1 --- /dev/null +++ b/hbase-spark2.0-compat/pom.xml @@ -0,0 +1,210 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark2.0-compat + Apache HBase - Spark Compatibility + + Interfaces to be implemented in order to smooth + over spark version differences + + + 2.0.0 + 2.11.8 + 2.11 + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + maven-surefire-plugin + + + + secondPartTestsExecution + test + + test + + + true + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala new file mode 100644 index 0000000..d6b142f --- /dev/null +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.spark + +import org.apache.log4j.{Level, LogManager, PropertyConfigurator} +import org.slf4j.{Logger, LoggerFactory} +import org.slf4j.impl.StaticLoggerBinder + +/** + * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows + * logging messages at different levels using methods that only evaluate parameters lazily if the + * log level is enabled. + * Logging is private in Spark 2.0 + * This is to isolate incompatibilties across Spark releases. + */ +trait Logging { + + // Make the log field transient so that objects with Logging can + // be serialized and used on another machine + @transient private var log_ : Logger = null + + // Method to get the logger name for this object + protected def logName = { + // Ignore trailing $'s in the class names for Scala objects + this.getClass.getName.stripSuffix("$") + } + + // Method to get or create the logger for this object + protected def log: Logger = { + if (log_ == null) { + initializeLogIfNecessary(false) + log_ = LoggerFactory.getLogger(logName) + } + log_ + } + + // Log methods that take only a String + protected def logInfo(msg: => String) { + if (log.isInfoEnabled) log.info(msg) + } + + protected def logDebug(msg: => String) { + if (log.isDebugEnabled) log.debug(msg) + } + + protected def logTrace(msg: => String) { + if (log.isTraceEnabled) log.trace(msg) + } + + protected def logWarning(msg: => String) { + if (log.isWarnEnabled) log.warn(msg) + } + + protected def logError(msg: => String) { + if (log.isErrorEnabled) log.error(msg) + } + + // Log methods that take Throwables (Exceptions/Errors) too + protected def logInfo(msg: => String, throwable: Throwable) { + if (log.isInfoEnabled) log.info(msg, throwable) + } + + protected def logDebug(msg: => String, throwable: Throwable) { + if (log.isDebugEnabled) log.debug(msg, throwable) + } + + protected def logTrace(msg: => String, throwable: Throwable) { + if (log.isTraceEnabled) log.trace(msg, throwable) + } + + protected def logWarning(msg: => String, throwable: Throwable) { + if (log.isWarnEnabled) log.warn(msg, throwable) + } + + protected def logError(msg: => String, throwable: Throwable) { + if (log.isErrorEnabled) log.error(msg, throwable) + } + + protected def isTraceEnabled(): Boolean = { + log.isTraceEnabled + } + + protected def initializeLogIfNecessary(isInterpreter: Boolean): Unit = { + if (!Logging.initialized) { + Logging.initLock.synchronized { + if (!Logging.initialized) { + initializeLogging(isInterpreter) + } + } + } + } + + private def initializeLogging(isInterpreter: Boolean): Unit = { + // Don't use a logger in here, as this is itself occurring during initialization of a logger + // If Log4j 1.2 is being used, but is not initialized, load a default properties file + val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr + // This distinguishes the log4j 1.2 binding, currently + // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently + // org.apache.logging.slf4j.Log4jLoggerFactory + val usingLog4j12 = "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass) + if (usingLog4j12) { + val log4j12Initialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements + } + Logging.initialized = true + + // Force a call into slf4j to initialize it. Avoids this happening from multiple threads + // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html + log + } +} + +private object Logging { + @volatile private var initialized = false + val initLock = new Object() +}