Details
-
Bug
-
Status: Resolved
-
Minor
-
Resolution: Invalid
-
2.4.0
-
None
-
None
Description
I am try to connect Apache Spark 2.4 using Java JDBC with Hive and getting
Logs :
org.apache.spark.sql.AnalysisException: cannot resolve '`XXX.yyy`' given input columns: error
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:88)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:85)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:288)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:268)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:268)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:279)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:289)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:293)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:293)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(QueryPlan.scala:298)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:298)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:268)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:85)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:78)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:78)
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:91)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:67)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2884)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1150)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1168)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1168)
at com.khan.vaquar.SparkHiveConnection.start_1(SparkHiveConnection.java:158)
at com.khan.vaquar.SparkHiveConnection.main(SparkHiveConnection.java:26)
19/03/07 14:19:35 INFO SparkContext: Invoking stop() from shutdown hook
x.x19/03/07 14:19:35 INFO SparkUI: Stopped Spark web UI at http://x:x.x.x:4040
19/03/07 14:19:35 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
19/03/07 14:19:35 INFO MemoryStore: MemoryStore cleared
19/03/07 14:19:35 INFO BlockManager: BlockManager stopped
19/03/07 14:19:35 INFO BlockManagerMaster: BlockManagerMaster stopped
19/03/07 14:19:35 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
19/03/07 14:19:35 INFO SparkContext: Successfully stopped SparkContext
19/03/07 14:19:35 INFO ShutdownHookManager: Shutdown hook called
While same connection I can print schema and its given proper values
– Schema.AAAA: string (nullable = true) |
|-- Schema.BBBB: date (nullable = true)
|-- Schema.CCCCC: date (nullable = true)
|-- Schema.DDDDD: integer (nullable = true)
|-- Schema.EEEEEEEE: integer (nullable = true)
|-- Schema.FFFFFFFFFFFF: integer (nullable = true)
|-- Schema.GGGGGGGGGGGG: integer (nullable = true)
|-- Schema.HHHHHHHHH: string (nullable = true)
Please find attached code
package com.khan.vaquar;
import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.ml.feature.NGram;
import org.apache.spark.ml.util.DefaultParamsReader.Metadata;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
public class SparkHiveConnection {
public static void main(String[] args) {
SparkHiveConnection app = new SparkHiveConnection();
try
{ app.start_1(); }catch (Exception e)
{ e.printStackTrace(); }}
private void start_1() {
SparkConf conf = new SparkConf().setAppName("Checkpoint").setMaster("local[*]");
SparkContext sparkContext = new SparkContext(conf);
// We need to specify where Spark will save the checkpoint file. It can
// be an HDFS location.
sparkContext.setCheckpointDir("/tmp");
//
// SparkSession spark =
// SparkSession.builder().appName("SparkHiveExample").master("local[*]").getOrCreate();
String hiveLocation = "jdbc:hive2://1XXXXXXXXXXXXX:2181,1XXXXXXXXXY:2181,1XXXXXXXXXXXZ:2181/;serviceDiscoveryMode=zookeeper;zookeeperNameSpace=hiveserver2";
SparkSession spark = SparkSession.builder().appName("SparkHiveExample")
// .master("local[*]")
// .config("spark.sql.warehouse.dir", hiveLocation)
// .config("hive.metastore.uris",hiveLocation)//
// "thrift://localhost:9083"
// .config("hive.mapred.supports.subdirectories", "true")
// .config("spark.driver.allowMultipleContexts", "true")
// .config("mapreduce.input.fileinputformat.input.dir.recursive",
// "true")
// .config("checkpointLocation", "/tmp/hive") // <-- checkpoint
// directory
// .config(" spark.sql.warehouse.dir",
// "//1x.x.x.x/apps/hive/warehouse")
// .enableHiveSupport()
.getOrCreate();
StructType schema = DataTypes.createStructType(new StructField[]
{ DataTypes.createStructField("AAAA", DataTypes.StringType, true), DataTypes.createStructField("BBBB", DataTypes.TimestampType, true), DataTypes.createStructField("CCCCC", DataTypes.TimestampType, true), DataTypes.createStructField("DDDDD", DataTypes.StringType, true), DataTypes.createStructField("EEEEEEEE", DataTypes.StringType, true), DataTypes.createStructField("FFFFFFFFFFFF", DataTypes.StringType, true), DataTypes.createStructField("GGGGGGGGGGGG", DataTypes.StringType, true), DataTypes.createStructField("HHHHHHHHH", DataTypes.StringType, true) });
System.out.print("SQL Session--------------------");
Dataset<Row> jdbcDF = spark.read()
.format("jdbc")
.option("url", hiveLocation)
.option("dbtable", "schema.TableName")
.option("user", "username")
.option("password", "password")
.option("fetchsize", "20")
.option("inferSchema", false)
//.schema(schema)
// .option("driver", "org.apache.hadoop.hive.jdbc.HiveDriver")
.load();
System.out.print("able to connect------------------ ");
jdbcDF.printSchema();
System.out.print("Results ------------------");
jdbcDF.select("columnName").alias("alias").limit(2).show();
}
}