diff --git a/hbase-assembly/pom.xml b/hbase-assembly/pom.xml index b9d8dcc..109f395 100644 --- a/hbase-assembly/pom.xml +++ b/hbase-assembly/pom.xml @@ -214,7 +214,42 @@ org.apache.hbase - hbase-spark + hbase-spark1.6-compat + ${project.version} + + + org.apache.hbase + hbase-spark1.6-compat_2.10 + ${project.version} + + + org.apache.hbase + hbase-spark2.0-compat + ${project.version} + + + org.apache.hbase + hbase-spark2.0-compat_2.10 + ${project.version} + + + org.apache.hbase + hbase-spark-${spark.version}_2.10 + ${project.version} + + + org.apache.hbase + hbase-spark-${spark.version}_2.11 + ${project.version} + + + org.apache.hbase + hbase-spark-1.6.1_2.10 + ${project.version} + + + org.apache.hbase + hbase-spark-1.6.1_2.11 ${project.version} diff --git a/hbase-spark/pom.xml b/hbase-spark/pom.xml index 035dfcc..e195b9c 100644 --- a/hbase-spark/pom.xml +++ b/hbase-spark/pom.xml @@ -33,13 +33,12 @@ 2.0.0-SNAPSHOT .. - hbase-spark - Apache HBase - Spark + hbase-spark-${spark.version}_2.11 + Apache HBase - Spark 2.0 with Scala 2.11 - 1.6.0 - 2.10.4 - 2.10 + 2.11.8 + 2.11 true ${project.basedir}/.. 1.7.6 @@ -686,6 +685,24 @@ + + spark-2.0 + + + !spark.profile + + + + 2.0.2 + + + + org.apache.hbase + hbase-spark2.0-compat + ${project.version} + + + skipSparkTests diff --git a/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java b/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java index 3e90fe1..3f3f550 100644 --- a/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java +++ b/hbase-spark/src/main/java/org/apache/hadoop/hbase/spark/SparkSQLPushDownFilter.java @@ -22,6 +22,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.FilterBase; +import org.apache.hadoop.hbase.filter.Filter.ReturnCode; import org.apache.hadoop.hbase.spark.datasources.BytesEncoder; import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder; import org.apache.hadoop.hbase.spark.protobuf.generated.SparkFilterProtos; diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala index b04abd8..373e97a 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/DefaultSource.scala @@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentLinkedQueue import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapred.TableOutputFormat +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources._ import org.apache.hadoop.hbase.types._ import org.apache.hadoop.hbase.util.{Bytes, PositionedByteRange, SimplePositionedMutableByteRange} @@ -32,7 +33,6 @@ import org.apache.hadoop.hbase.HColumnDescriptor import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.mapred.JobConf -import org.apache.spark.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.datasources.hbase.{Utils, Field, HBaseTableCatalog} import org.apache.spark.sql.{DataFrame, SaveMode, Row, SQLContext} diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala index fb5833e..002747b 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCache.scala @@ -23,9 +23,9 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.client.{Admin, Connection, ConnectionFactory, RegionLocator, Table} import org.apache.hadoop.hbase.ipc.RpcControllerFactory import org.apache.hadoop.hbase.security.{User, UserProvider} +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf import org.apache.hadoop.hbase.{HConstants, TableName} -import org.apache.spark.Logging import scala.collection.mutable diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala index aeffecb..80b741d 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/HBaseContext.scala @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding import org.apache.hadoop.hbase.io.hfile.{HFile, CacheConfig, HFileContextBuilder, HFileWriterImpl} import org.apache.hadoop.hbase.regionserver.{HStore, StoreFile, StoreFileWriter, BloomType} +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.mapred.JobConf import org.apache.spark.broadcast.Broadcast @@ -38,7 +39,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.client._ import scala.reflect.ClassTag -import org.apache.spark.{Logging, SerializableWritable, SparkContext} +import org.apache.spark.{SerializableWritable, SparkContext} import org.apache.hadoop.hbase.mapreduce.{TableMapReduceUtil, TableInputFormat, IdentityTableMapper} import org.apache.hadoop.hbase.io.ImmutableBytesWritable diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala index a99e0e3..fe837c6 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/JavaHBaseContext.scala @@ -25,6 +25,8 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.api.java.function.{FlatMapFunction, Function, VoidFunction} import org.apache.spark.streaming.api.java.JavaDStream +import java.lang.Iterable + import scala.collection.JavaConversions._ import scala.reflect.ClassTag @@ -103,11 +105,17 @@ class JavaHBaseContext(@transient jsc: JavaSparkContext, f: FlatMapFunction[(java.util.Iterator[T], Connection), R]): JavaRDD[R] = { - def fn = (it: Iterator[T], conn: Connection) => + def fn = (it: Iterator[T], conn: Connection) => { asScalaIterator( - f.call((asJavaIterator(it), conn)).iterator() + // the return type is different in spark 1.x & 2.x, we handle both cases + f.call(asJavaIterator(it), conn) match { + // spark 1.x + case iterable: Iterable[R] => iterable.iterator() + // spark 2.x + case iterator: Iterator[R] => iterator + } ) - + } JavaRDD.fromRDD(hbaseContext.mapPartitions(javaRdd.rdd, (iterator: Iterator[T], connection: Connection) => fn(iterator, connection))(fakeClassTag[R]))(fakeClassTag[R]) diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala index 7761acb..b227e2b 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/HBaseTableScanRDD.scala @@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.spark.datasources.HBaseResources._ import org.apache.hadoop.hbase.util.ShutdownHookManager import org.apache.spark.sql.datasources.hbase.Field -import org.apache.spark.{SparkEnv, TaskContext, Logging, Partition} +import org.apache.spark.{SparkEnv, TaskContext, Partition} import org.apache.spark.rdd.RDD import scala.collection.mutable @@ -34,7 +34,8 @@ class HBaseTableScanRDD(relation: HBaseRelation, val hbaseContext: HBaseContext, @transient val filter: Option[SparkSQLPushDownFilter] = None, val columns: Seq[Field] = Seq.empty - )extends RDD[Result](relation.sqlContext.sparkContext, Nil) with Logging { + ) extends RDD[Result](relation.sqlContext.sparkContext, Nil) + { private def sparkConf = SparkEnv.get.conf @transient var ranges = Seq.empty[Range] @transient var points = Seq.empty[Array[Byte]] diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala index 851fb66..8b67534 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/JavaBytesEncoder.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark.datasources +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ /** @@ -102,4 +102,4 @@ object JavaBytesEncoder extends Enumeration with Logging{ new NaiveEncoder() } } -} \ No newline at end of file +} diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala index 3137717..99bc2a9 100644 --- a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/datasources/NaiveEncoder.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark.datasources */ import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala deleted file mode 100644 index 1e56a3d..0000000 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.datasources.hbase - -import org.apache.spark.sql.catalyst.SqlLexical -import org.apache.spark.sql.catalyst.util.DataTypeParser -import org.apache.spark.sql.types.DataType - -object DataTypeParserWrapper { - lazy val dataTypeParser = new DataTypeParser { - override val lexical = new SqlLexical - } - - def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString) -} diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala index c2d611f..5eaedbd 100644 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala +++ b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/HBaseTableCatalog.scala @@ -18,12 +18,11 @@ package org.apache.spark.sql.datasources.hbase import org.apache.avro.Schema +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.SchemaConverters import org.apache.hadoop.hbase.spark.datasources._ import org.apache.hadoop.hbase.spark.hbase._ import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging -import org.apache.spark.sql.catalyst.util.DataTypeParser import org.apache.spark.sql.types._ import org.json4s.jackson.JsonMethods._ @@ -77,7 +76,7 @@ case class Field( } val dt = { - sType.map(DataTypeParser.parse(_)).getOrElse{ + sType.map(DataTypeParserWrapper.parse(_)).getOrElse{ schema.map{ x=> SchemaConverters.toSqlType(x).dataType }.get diff --git a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala index 73d054d..160c6f9 100644 --- a/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala +++ b/hbase-spark/src/main/scala/org/apache/spark/sql/datasources/hbase/Utils.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.datasources.hbase import org.apache.hadoop.hbase.spark.AvroSerdes import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.sql.execution.SparkSqlSerializer import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -57,8 +56,8 @@ object Utils { val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) newArray - // TODO: add more data type support - case _ => SparkSqlSerializer.deserialize[Any](src) + // TODO: SparkSqlSerializer.deserialize[Any](src) + case _ => throw new Exception(s"unsupported data type ${f.dt}") } } } diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala index 795ce6d..05b9b38 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/BulkLoadSuite.scala @@ -21,10 +21,11 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hbase.client.{Get, ConnectionFactory} import org.apache.hadoop.hbase.io.hfile.{CacheConfig, HFile} import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.{HConstants, CellUtil, HBaseTestingUtility, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ -import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.SparkContext import org.junit.rules.TemporaryFolder import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala index 0f8baed..f8b053e 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DefaultSourceSuite.scala @@ -20,13 +20,14 @@ package org.apache.hadoop.hbase.spark import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, SQLContext} -import org.apache.spark.{Logging, SparkConf, SparkContext} +import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} case class HBaseRecord( @@ -377,8 +378,8 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { assert(results.length == 2) - assert(executionRules.dynamicLogicExpression.toExpressionString. - equals("( KEY_FIELD <= 0 AND KEY_FIELD >= 1 )")) + val expr = executionRules.dynamicLogicExpression.toExpressionString + assert(expr.equals("( ( KEY_FIELD isNotNull AND KEY_FIELD <= 0 ) AND KEY_FIELD >= 1 )"), expr) assert(executionRules.rowKeyFilter.points.size == 0) assert(executionRules.rowKeyFilter.ranges.size == 1) @@ -653,8 +654,9 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { assert(localResult(0).getInt(2) == 8) val executionRules = DefaultSourceStaticUtils.lastFiveExecutionRules.poll() - assert(executionRules.dynamicLogicExpression.toExpressionString. - equals("( I_FIELD > 0 AND I_FIELD < 1 )")) + val expr = executionRules.dynamicLogicExpression.toExpressionString + logInfo(expr) + assert(expr.equals("( ( I_FIELD isNotNull AND I_FIELD > 0 ) AND I_FIELD < 1 )"), expr) } diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala index b9c15ce..f3abc62 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/DynamicLogicExpressionSuite.scala @@ -19,9 +19,9 @@ package org.apache.hadoop.hbase.spark import java.util +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.{HBaseSparkConf, JavaBytesEncoder} import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala index 49e2f6c..f9e24c8 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseCatalogSuite.scala @@ -17,9 +17,9 @@ package org.apache.hadoop.hbase.spark +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.spark.datasources.{DoubleSerDes, SerDes} import org.apache.hadoop.hbase.util.Bytes -import org.apache.spark.Logging import org.apache.spark.sql.datasources.hbase.{DataTypeParserWrapper, HBaseTableCatalog} import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala index 6ebf044..3f7697f 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseConnectionCacheSuite.scala @@ -24,7 +24,7 @@ import org.apache.hadoop.hbase.client.{BufferedMutator, Table, RegionLocator, Connection, BufferedMutatorParams, Admin} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.TableName -import org.apache.spark.Logging +import org.apache.hadoop.hbase.spark.Logging import org.scalatest.FunSuite case class HBaseConnectionKeyMocker (confId: Int) extends HBaseConnectionKey (null) { diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala index 1e1e52d..6e6312e 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseContextSuite.scala @@ -20,7 +20,8 @@ import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{ CellUtil, TableName, HBaseTestingUtility} -import org.apache.spark.{SparkException, Logging, SparkContext} +import org.apache.hadoop.hbase.spark.Logging +import org.apache.spark.{SparkException, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseContextSuite extends FunSuite with diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala index 3b14c35..2ad640d 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseDStreamFunctionsSuite.scala @@ -17,11 +17,12 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.client._ +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Milliseconds, StreamingContext} -import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.SparkContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} @@ -139,4 +140,4 @@ BeforeAndAfterEach with BeforeAndAfterAll with Logging { } } -} \ No newline at end of file +} diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala index 89148c3..17921c3 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/HBaseRDDFunctionsSuite.scala @@ -17,10 +17,11 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.client._ +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ -import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.SparkContext import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.mutable diff --git a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala index d33ced9..d573279 100644 --- a/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala +++ b/hbase-spark/src/test/scala/org/apache/hadoop/hbase/spark/PartitionFilterSuite.scala @@ -18,10 +18,11 @@ package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.spark.datasources.HBaseSparkConf +import org.apache.hadoop.hbase.spark.Logging import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.{DataFrame, SQLContext} -import org.apache.spark.{SparkConf, SparkContext, Logging} +import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} case class FilterRangeRecord( diff --git a/pom.xml b/pom.xml index 285e358..c9da131 100644 --- a/pom.xml +++ b/pom.xml @@ -69,6 +69,10 @@ hbase-protocol hbase-client hbase-hadoop-compat + hbase-spark2.0-compat + hbase-spark2.0-compat-scala-2.10 + hbase-spark1.6-compat + hbase-spark1.6-compat-scala-2.10 hbase-common hbase-procedure hbase-endpoint @@ -83,6 +87,9 @@ hbase-external-blockcache hbase-shaded hbase-spark + hbase-spark-scala-2.10 + hbase-spark-1.6 + hbase-spark-1.6-scala-2.10 hbase-archetypes 2.7.1 3.0.0-alpha1 + 2.0.0 + 1.6.0 @@ -1233,6 +1242,7 @@ 4.4.4 3.1.2 12.0.1 + 2.0.2 1.9.13 5.5.23 2.2.2 @@ -1389,6 +1399,26 @@ org.apache.hbase + hbase-spark1.6-compat + ${project.version} + + + org.apache.hbase + hbase-spark1.6-compat-scala-2.10 + ${project.version} + + + org.apache.hbase + hbase-spark2.0-compat + ${project.version} + + + org.apache.hbase + hbase-spark2.0-compat-scala-2.10 + ${project.version} + + + org.apache.hbase hbase-hadoop-compat ${project.version} test-jar diff --git a/src/main/asciidoc/_chapters/spark.adoc b/src/main/asciidoc/_chapters/spark.adoc index 774d137..921e636 100644 --- a/src/main/asciidoc/_chapters/spark.adoc +++ b/src/main/asciidoc/_chapters/spark.adoc @@ -44,6 +44,8 @@ Spark Bulk Load:: The ability to write directly to HBase HFiles for bulk insertion into HBase SparkSQL/DataFrames:: The ability to write SparkSQL that draws on tables that are represented in HBase. +Artifacts for various Spark / Scala combinations:: + Location of artifacts for various Spark (1.6 vs. 2.0) / Scala (2.10 vs. 2.11) combinations The following sections will walk through examples of all these interaction points. @@ -687,4 +689,16 @@ The date frame `df` returned by `withCatalog` function could be used to access t After loading df DataFrame, users can query data. registerTempTable registers df DataFrame as a temporary table using the table name avrotable. `sqlContext.sql` function allows the user to execute SQL queries. -==== \ No newline at end of file +==== + +== Artifacts for various Spark / Scala combinations:: + +There are two active Spark releases: 1.6 and 2.0 . Both Scala 2.10 and Scala 2.11 are supported for these two releases. +In order to allow downstream projects easily locate artifacts for the four combinations (Spark releases and Scala versions), +the build process would generate 4 jars. e.g. + +hbase-spark-scala-2.10/target/hbase-spark-2.0.2_2.10-2.0.0-SNAPSHOT.jar +hbase-spark/target/hbase-spark-2.0.2_2.11-2.0.0-SNAPSHOT.jar +hbase-spark-1.6-scala-2.10/target/hbase-spark-1.6.1_2.10-2.0.0-SNAPSHOT.jar +hbase-spark-1.6/target/hbase-spark-1.6.1_2.11-2.0.0-SNAPSHOT.jar + diff --git a/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala new file mode 100644 index 0000000..1e56a3d --- /dev/null +++ b/hbase-spark1.6-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasources.hbase + +import org.apache.spark.sql.catalyst.SqlLexical +import org.apache.spark.sql.catalyst.util.DataTypeParser +import org.apache.spark.sql.types.DataType + +object DataTypeParserWrapper { + lazy val dataTypeParser = new DataTypeParser { + override val lexical = new SqlLexical + } + + def parse(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString) +} diff --git a/hbase-spark1.6-compat/pom.xml b/hbase-spark1.6-compat/pom.xml new file mode 100644 index 0000000..68b7b23 --- /dev/null +++ b/hbase-spark1.6-compat/pom.xml @@ -0,0 +1,192 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark1.6-compat + Apache HBase - Spark 1.6 Compatibility + + Interfaces to be implemented in order to smooth + over spark version differences + + + 2.11.8 + 2.11 + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark-one.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark-one.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark1.6-compat-scala-2.10/pom.xml b/hbase-spark1.6-compat-scala-2.10/pom.xml new file mode 100644 index 0000000..d6ef9b0 --- /dev/null +++ b/hbase-spark1.6-compat-scala-2.10/pom.xml @@ -0,0 +1,210 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark1.6-compat_2.10 + Apache HBase - Spark 1.6 Compatibility with Scala 2.10 + + Interfaces to be implemented in order to smooth + over spark version differences + + + 2.10.4 + 2.10 + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + validate + + add-source + + + + ../hbase-spark1.6-compat/src/main/scala + + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark-one.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark-one.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala b/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala new file mode 100644 index 0000000..50e281e --- /dev/null +++ b/hbase-spark2.0-compat/src/main/scala/org/apache/spark/sql/datasources/hbase/DataTypeParserWrapper.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasources.hbase + +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.types.DataType + +/* + * This class is to isolate type incompatibilities between Spark 1.6 and Spark 2.0 + */ +object DataTypeParserWrapper { + /* + lazy val dataTypeParser = new DataTypeParser { + override val lexical = new SqlLexical + } + */ + + def parse(dataTypeString: String): DataType = CatalystSqlParser.parseDataType(dataTypeString) +} diff --git a/hbase-spark2.0-compat/pom.xml b/hbase-spark2.0-compat/pom.xml new file mode 100644 index 0000000..7e3d70a --- /dev/null +++ b/hbase-spark2.0-compat/pom.xml @@ -0,0 +1,192 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark2.0-compat + Apache HBase - Spark Compatibility + + Interfaces to be implemented in order to smooth + over spark version differences + + + 2.11.8 + 2.11 + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark-two.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark-two.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark2.0-compat-scala-2.10/pom.xml b/hbase-spark2.0-compat-scala-2.10/pom.xml new file mode 100644 index 0000000..d2c24fd --- /dev/null +++ b/hbase-spark2.0-compat-scala-2.10/pom.xml @@ -0,0 +1,210 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + + hbase-spark2.0-compat_2.10 + Apache HBase - Spark 2.0 Compatibility with Scala 2.10 + + Interfaces to be implemented in order to smooth + over spark version differences + + + 2.10.4 + 2.10 + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + validate + + add-source + + + + ../hbase-spark2.0-compat/src/main/scala + + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + ${maven.assembly.version} + + true + + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + [3.2,) + + compile + + + + + + + + + + + + + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark-two.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark-two.version} + provided + + + org.apache.hbase + hbase-annotations + test-jar + test + + + + commons-logging + commons-logging + + + org.apache.commons + commons-math + + + + + + + skipSparkCompatTests + + + skipSparkCompatTests + + + + true + + + + + diff --git a/hbase-spark-scala-2.10/pom.xml b/hbase-spark-scala-2.10/pom.xml new file mode 100644 index 0000000..89e4290 --- /dev/null +++ b/hbase-spark-scala-2.10/pom.xml @@ -0,0 +1,753 @@ + + + + + + 4.0.0 + + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + hbase-spark-${spark.version}_2.10 + Apache HBase - Spark 2.0 with Scala 2.10 + + + 2.10.4 + 2.10 + true + ${project.basedir}/.. + 1.7.6 + + + + + + + javax.servlet + javax.servlet-api + 3.0.1 + test + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + com.google.code.findbugs + jsr305 + 1.3.9 + provided + true + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + test-jar + tests + test + + + junit + junit + test + + + + org.scalatest + scalatest_${scala.binary.version} + 2.2.4 + test + + + + org.scalamock + scalamock-scalatest-support_${scala.binary.version} + 3.1.4 + test + + + + org.apache.hadoop + hadoop-client + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + xerces + xercesImpl + + + + + + org.apache.hbase + hbase-client + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-protocol + ${project.version} + + + + org.apache.hbase + hbase-annotations + ${project.version} + test-jar + test + + + + org.apache.hbase + hbase-hadoop-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-hadoop2-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + org.apache.hbase + hbase-server + ${project.version} + + + org.apache.hbase + hbase-server + ${project.version} + test + test-jar + + + org.apache.hbase + hbase-it + ${project.version} + test-jar + test + + + com.google.protobuf + protobuf-java + + + commons-logging + commons-logging + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test-jar + test + + + org.apache.avro + avro + ${avro.version} + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + org.scalatest + scalatest-maven-plugin + 1.0 + + ${project.build.directory}/surefire-reports + . + WDF TestSuite.txt + false + + + + test + test + + test + + + true + + + + integration-test + integration-test + + test + + + Integration-Test + + -Xmx1536m -XX:ReservedCodeCacheSize=512m + + false + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + validate + + add-source + + + + ../hbase-spark/src/main/scala + ../hbase-spark/src/main/java + + + + + add-test-source + validate + + add-test-source + + + + ../hbase-spark/src/test/scala + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + banned-jsr305 + + enforce + + + false + + + + + + + + + + spark-2.0 + + + !spark.profile + + + + 2.0.2 + + + + org.apache.hbase + hbase-spark2.0-compat_2.10 + ${project.version} + + + + + + skipSparkTests + + + skipSparkTests + + + + true + true + + + + compile-protobuf + + + compile-protobuf + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + + + compile-protoc + generate-sources + + compile + + + + + + + + + diff --git a/hbase-spark-1.6/pom.xml b/hbase-spark-1.6/pom.xml new file mode 100644 index 0000000..938b401 --- /dev/null +++ b/hbase-spark-1.6/pom.xml @@ -0,0 +1,753 @@ + + + + + + 4.0.0 + + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + hbase-spark-1.6.1_2.11 + Apache HBase - Spark 1.6.1 with Scala 2.11 + + + 2.11.4 + 2.11 + true + ${project.basedir}/.. + 1.7.6 + + + + + + + javax.servlet + javax.servlet-api + 3.0.1 + test + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + + org.apache.spark + spark-core_${scala.binary.version} + 1.6.1 + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + com.google.code.findbugs + jsr305 + 1.3.9 + provided + true + + + org.apache.spark + spark-sql_${scala.binary.version} + 1.6.1 + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + 1.6.1 + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + 1.6.1 + test-jar + tests + test + + + junit + junit + test + + + + org.scalatest + scalatest_${scala.binary.version} + 2.2.4 + test + + + + org.scalamock + scalamock-scalatest-support_${scala.binary.version} + 3.1.4 + test + + + + org.apache.hadoop + hadoop-client + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + xerces + xercesImpl + + + + + + org.apache.hbase + hbase-client + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-protocol + ${project.version} + + + + org.apache.hbase + hbase-annotations + ${project.version} + test-jar + test + + + + org.apache.hbase + hbase-hadoop-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-hadoop2-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + org.apache.hbase + hbase-server + ${project.version} + + + org.apache.hbase + hbase-server + ${project.version} + test + test-jar + + + org.apache.hbase + hbase-it + ${project.version} + test-jar + test + + + com.google.protobuf + protobuf-java + + + commons-logging + commons-logging + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test-jar + test + + + org.apache.avro + avro + ${avro.version} + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + org.scalatest + scalatest-maven-plugin + 1.0 + + ${project.build.directory}/surefire-reports + . + WDF TestSuite.txt + false + + + + test + test + + test + + + true + + + + integration-test + integration-test + + test + + + Integration-Test + + -Xmx1536m -XX:ReservedCodeCacheSize=512m + + false + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + validate + + add-source + + + + ../hbase-spark/src/main/scala + ../hbase-spark/src/main/java + + + + + add-test-source + validate + + add-test-source + + + + ../hbase-spark/src/test/scala + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + banned-jsr305 + + enforce + + + false + + + + + + + + + + spark-1.6 + + + !spark.profile + + + + 1.6.0 + + + + org.apache.hbase + hbase-spark1.6-compat + ${project.version} + + + + + + skipSparkTests + + + skipSparkTests + + + + true + true + + + + compile-protobuf + + + compile-protobuf + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + + + compile-protoc + generate-sources + + compile + + + + + + + + + diff --git a/hbase-spark-1.6-scala-2.10/pom.xml b/hbase-spark-1.6-scala-2.10/pom.xml new file mode 100644 index 0000000..9962b19 --- /dev/null +++ b/hbase-spark-1.6-scala-2.10/pom.xml @@ -0,0 +1,753 @@ + + + + + + 4.0.0 + + + hbase + org.apache.hbase + 2.0.0-SNAPSHOT + .. + + hbase-spark-1.6.1_2.10 + Apache HBase - Spark 1.6.1 with Scala 2.10 + + + 2.10.4 + 2.10 + true + ${project.basedir}/.. + 1.7.6 + + + + + + + javax.servlet + javax.servlet-api + 3.0.1 + test + + + + + org.scala-lang + scala-library + ${scala.version} + provided + + + + org.apache.spark + spark-core_${scala.binary.version} + 1.6.1 + provided + + + + org.scala-lang + scala-library + + + + org.scala-lang + scalap + + + com.google.code.findbugs + jsr305 + + + + + com.google.code.findbugs + jsr305 + 1.3.9 + provided + true + + + org.apache.spark + spark-sql_${scala.binary.version} + 1.6.1 + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + 1.6.1 + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + 1.6.1 + test-jar + tests + test + + + junit + junit + test + + + + org.scalatest + scalatest_${scala.binary.version} + 2.2.4 + test + + + + org.scalamock + scalamock-scalatest-support_${scala.binary.version} + 3.1.4 + test + + + + org.apache.hadoop + hadoop-client + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-common + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop-two.version} + test-jar + test + + + log4j + log4j + + + javax.servlet + servlet-api + + + javax.servlet.jsp + jsp-api + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + xerces + xercesImpl + + + + + + org.apache.hbase + hbase-client + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-protocol + ${project.version} + + + + org.apache.hbase + hbase-annotations + ${project.version} + test-jar + test + + + + org.apache.hbase + hbase-hadoop-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + + org.apache.hbase + hbase-hadoop2-compat + ${project.version} + test + test-jar + + + log4j + log4j + + + org.apache.thrift + thrift + + + org.jruby + jruby-complete + + + org.slf4j + slf4j-log4j12 + + + org.mortbay.jetty + jsp-2.1 + + + org.mortbay.jetty + jsp-api-2.1 + + + org.mortbay.jetty + servlet-api-2.5 + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + tomcat + jasper-compiler + + + org.jruby + jruby-complete + + + org.jboss.netty + netty + + + io.netty + netty + + + + + org.apache.hbase + hbase-server + ${project.version} + + + org.apache.hbase + hbase-server + ${project.version} + test + test-jar + + + org.apache.hbase + hbase-it + ${project.version} + test-jar + test + + + com.google.protobuf + protobuf-java + + + commons-logging + commons-logging + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test-jar + test + + + org.apache.avro + avro + ${avro.version} + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + net.alchim31.maven + scala-maven-plugin + 3.2.0 + + ${project.build.sourceEncoding} + ${scala.version} + + -feature + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + org.scalatest + scalatest-maven-plugin + 1.0 + + ${project.build.directory}/surefire-reports + . + WDF TestSuite.txt + false + + + + test + test + + test + + + true + + + + integration-test + integration-test + + test + + + Integration-Test + + -Xmx1536m -XX:ReservedCodeCacheSize=512m + + false + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + validate + + add-source + + + + ../hbase-spark/src/main/scala + ../hbase-spark/src/main/java + + + + + add-test-source + validate + + add-test-source + + + + ../hbase-spark/src/test/scala + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + banned-jsr305 + + enforce + + + false + + + + + + + + + + spark-1.6 + + + !spark.profile + + + + 1.6.0 + + + + org.apache.hbase + hbase-spark1.6-compat_2.10 + ${project.version} + + + + + + skipSparkTests + + + skipSparkTests + + + + true + true + + + + compile-protobuf + + + compile-protobuf + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + + + compile-protoc + generate-sources + + compile + + + + + + + + + diff --git a/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala new file mode 100644 index 0000000..d6b142f --- /dev/null +++ b/hbase-spark/src/main/scala/org/apache/hadoop/hbase/spark/Logging.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.spark + +import org.apache.log4j.{Level, LogManager, PropertyConfigurator} +import org.slf4j.{Logger, LoggerFactory} +import org.slf4j.impl.StaticLoggerBinder + +/** + * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows + * logging messages at different levels using methods that only evaluate parameters lazily if the + * log level is enabled. + * Logging is private in Spark 2.0 + * This is to isolate incompatibilties across Spark releases. + */ +trait Logging { + + // Make the log field transient so that objects with Logging can + // be serialized and used on another machine + @transient private var log_ : Logger = null + + // Method to get the logger name for this object + protected def logName = { + // Ignore trailing $'s in the class names for Scala objects + this.getClass.getName.stripSuffix("$") + } + + // Method to get or create the logger for this object + protected def log: Logger = { + if (log_ == null) { + initializeLogIfNecessary(false) + log_ = LoggerFactory.getLogger(logName) + } + log_ + } + + // Log methods that take only a String + protected def logInfo(msg: => String) { + if (log.isInfoEnabled) log.info(msg) + } + + protected def logDebug(msg: => String) { + if (log.isDebugEnabled) log.debug(msg) + } + + protected def logTrace(msg: => String) { + if (log.isTraceEnabled) log.trace(msg) + } + + protected def logWarning(msg: => String) { + if (log.isWarnEnabled) log.warn(msg) + } + + protected def logError(msg: => String) { + if (log.isErrorEnabled) log.error(msg) + } + + // Log methods that take Throwables (Exceptions/Errors) too + protected def logInfo(msg: => String, throwable: Throwable) { + if (log.isInfoEnabled) log.info(msg, throwable) + } + + protected def logDebug(msg: => String, throwable: Throwable) { + if (log.isDebugEnabled) log.debug(msg, throwable) + } + + protected def logTrace(msg: => String, throwable: Throwable) { + if (log.isTraceEnabled) log.trace(msg, throwable) + } + + protected def logWarning(msg: => String, throwable: Throwable) { + if (log.isWarnEnabled) log.warn(msg, throwable) + } + + protected def logError(msg: => String, throwable: Throwable) { + if (log.isErrorEnabled) log.error(msg, throwable) + } + + protected def isTraceEnabled(): Boolean = { + log.isTraceEnabled + } + + protected def initializeLogIfNecessary(isInterpreter: Boolean): Unit = { + if (!Logging.initialized) { + Logging.initLock.synchronized { + if (!Logging.initialized) { + initializeLogging(isInterpreter) + } + } + } + } + + private def initializeLogging(isInterpreter: Boolean): Unit = { + // Don't use a logger in here, as this is itself occurring during initialization of a logger + // If Log4j 1.2 is being used, but is not initialized, load a default properties file + val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr + // This distinguishes the log4j 1.2 binding, currently + // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently + // org.apache.logging.slf4j.Log4jLoggerFactory + val usingLog4j12 = "org.slf4j.impl.Log4jLoggerFactory".equals(binderClass) + if (usingLog4j12) { + val log4j12Initialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements + } + Logging.initialized = true + + // Force a call into slf4j to initialize it. Avoids this happening from multiple threads + // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html + log + } +} + +private object Logging { + @volatile private var initialized = false + val initLock = new Object() +}