Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Not A Bug
-
3.3.0
-
None
-
None
-
/usr/lib/spark/jars/spark-core_2.11-2.0.2.jar
Description
Spark 2.0 + tinkerpop-3.3.0
Simple program which pulls out the 1st vertex in the grateful-dead.kryo dataset and prints the property keys works with the standard computer, but when processed using the SparkGraphComputer, the set of keys is empty.
// pre-requisite: // sudo -u zeppelin hadoop fs -copyFromLocal /tmp/grateful-dead.kryo grateful-dead.kryo val inputHdfsLocation = "grateful-dead.kryo" val props = Map[String, String]( "gremlin.graph" -> "org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph" , "gremlin.hadoop.graphReader" -> "org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat" , "gremlin.hadoop.inputLocation" -> inputHdfsLocation , "gremlin.hadoop.outputLocation" -> "output" , "gremlin.hadoop.jarsInDistributedCache" -> "true" , "spark.master" -> "local[1]" , "spark.executor.memory" -> "1g" , "spark.serializer" -> "org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer" // , "spark.kryo.registrator" -> "org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator" ) import org.apache.commons.configuration._ val conf = new BaseConfiguration() props.foreach( kv => conf.addProperty(kv._1, kv._2)) import org.apache.tinkerpop.gremlin.process.computer._ import org.apache.tinkerpop.gremlin.spark.process.computer._ import org.apache.tinkerpop.gremlin.structure.util._ val graph = GraphFactory.open(conf) val v = graph.traversal().V().next(1).get(0) printf("vertex id = %s, keys = %s\n", v.id, v.keys()) val computer = Computer.compute(classOf[SparkGraphComputer]) val v2 = graph.traversal().withComputer(computer).V().next(1).get(0) printf("vertex id = %s, keys = %s\n", v2.id, v2.keys())
Above produces:
inputHdfsLocation: String = grateful-dead.kryo props: scala.collection.immutable.Map[String,String] = Map(spark.serializer -> org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer, gremlin.hadoop.inputLocation -> grateful-dead.kryo, gremlin.hadoop.jarsInDistributedCache -> true, gremlin.hadoop.graphReader -> org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat, gremlin.graph -> org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph, gremlin.hadoop.outputLocation -> output, spark.master -> local[1], spark.executor.memory -> 1g) import org.apache.commons.configuration._ conf: org.apache.commons.configuration.BaseConfiguration = org.apache.commons.configuration.BaseConfiguration@1849d0b7 import org.apache.tinkerpop.gremlin.process.computer._ import org.apache.tinkerpop.gremlin.spark.process.computer._ import org.apache.tinkerpop.gremlin.structure.util._ graph: org.apache.tinkerpop.gremlin.structure.Graph = hadoopgraph[gryoinputformat->no-writer] v: org.apache.tinkerpop.gremlin.structure.Vertex = v[1] vertex id = 1, keys = [name, songType, performances] computer: org.apache.tinkerpop.gremlin.process.computer.Computer = sparkgraphcomputer v2: org.apache.tinkerpop.gremlin.structure.Vertex = v[1] vertex id = 1, keys = []
Notice the empty set of keys when run w/ the SparkGraphComputer, but the correct set when using the standard computer