Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-17747

WeightCol support non-double datatypes

    XMLWordPrintableJSON

    Details

    • Type: Improvement
    • Status: Resolved
    • Priority: Minor
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: 2.2.0
    • Component/s: ML
    • Labels:
      None
    • Target Version/s:

      Description

      WeightCol only support double type now, which should fit with other numeric types, such as Int.

      scala> df3.show(5)
      +-----+--------------------+------+
      |label|            features|weight|
      +-----+--------------------+------+
      |  0.0|(692,[127,128,129...|     1|
      |  1.0|(692,[158,159,160...|     1|
      |  1.0|(692,[124,125,126...|     1|
      |  1.0|(692,[152,153,154...|     1|
      |  1.0|(692,[151,152,153...|     1|
      +-----+--------------------+------+
      only showing top 5 rows
      
      
      scala> val lr = new LogisticRegression().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8)
      lr: org.apache.spark.ml.classification.LogisticRegression = logreg_ee0308a72919
      
      scala> val lrm = lr.fit(df3)
      16/09/20 15:46:12 WARN LogisticRegression: LogisticRegression training finished but the result is not converged because: max iterations reached
      lrm: org.apache.spark.ml.classification.LogisticRegressionModel = logreg_ee0308a72919
      
      scala> val lr = new LogisticRegression().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8).setWeightCol("weight")
      lr: org.apache.spark.ml.classification.LogisticRegression = logreg_ced7579d5680
      
      scala> val lrm = lr.fit(df3)
      16/09/20 15:46:27 WARN BlockManager: Putting block rdd_211_0 failed
      16/09/20 15:46:27 ERROR Executor: Exception in task 0.0 in stage 89.0 (TID 92)
      scala.MatchError: [0.0,1,(692,[127,128,129,130,131,154,155,156,157,158,159,181,182,183,184,185,186,187,188,189,207,208,209,210,211,212,213,214,215,216,217,235,236,237,238,239,240,241,242,243,244,245,262,263,264,265,266,267,268,269,270,271,272,273,289,290,291,292,293,294,295,296,297,300,301,302,316,317,318,319,320,321,328,329,330,343,344,345,346,347,348,349,356,357,358,371,372,373,374,384,385,386,399,400,401,412,413,414,426,427,428,429,440,441,442,454,455,456,457,466,467,468,469,470,482,483,484,493,494,495,496,497,510,511,512,520,521,522,523,538,539,540,547,548,549,550,566,567,568,569,570,571,572,573,574,575,576,577,578,594,595,596,597,598,599,600,601,602,603,604,622,623,624,625,626,627,628,629,630,651,652,653,654,655,656,657],[51.0,159.0,253.0,159.0,50.0,48.0,238.0,252.0,252.0,252.0,237.0,54.0,227.0,253.0,252.0,239.0,233.0,252.0,57.0,6.0,10.0,60.0,224.0,252.0,253.0,252.0,202.0,84.0,252.0,253.0,122.0,163.0,252.0,252.0,252.0,253.0,252.0,252.0,96.0,189.0,253.0,167.0,51.0,238.0,253.0,253.0,190.0,114.0,253.0,228.0,47.0,79.0,255.0,168.0,48.0,238.0,252.0,252.0,179.0,12.0,75.0,121.0,21.0,253.0,243.0,50.0,38.0,165.0,253.0,233.0,208.0,84.0,253.0,252.0,165.0,7.0,178.0,252.0,240.0,71.0,19.0,28.0,253.0,252.0,195.0,57.0,252.0,252.0,63.0,253.0,252.0,195.0,198.0,253.0,190.0,255.0,253.0,196.0,76.0,246.0,252.0,112.0,253.0,252.0,148.0,85.0,252.0,230.0,25.0,7.0,135.0,253.0,186.0,12.0,85.0,252.0,223.0,7.0,131.0,252.0,225.0,71.0,85.0,252.0,145.0,48.0,165.0,252.0,173.0,86.0,253.0,225.0,114.0,238.0,253.0,162.0,85.0,252.0,249.0,146.0,48.0,29.0,85.0,178.0,225.0,253.0,223.0,167.0,56.0,85.0,252.0,252.0,252.0,229.0,215.0,252.0,252.0,252.0,196.0,130.0,28.0,199.0,252.0,252.0,253.0,252.0,252.0,233.0,145.0,25.0,128.0,252.0,253.0,252.0,141.0,37.0])] (of class org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema)
      	at org.apache.spark.ml.classification.LogisticRegression$$anonfun$6.apply(LogisticRegression.scala:266)
      	at org.apache.spark.ml.classification.LogisticRegression$$anonfun$6.apply(LogisticRegression.scala:266)
      	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
      	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:214)
      	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:919)
      	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:910)
      	at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:866)
      	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:910)
      	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:668)
      	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:330)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:281)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
      	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
      	at org.apache.spark.scheduler.Task.run(Task.scala:85)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      	at java.lang.Thread.run(Thread.java:745)
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                podongfeng zhengruifeng
                Reporter:
                podongfeng zhengruifeng
                Shepherd:
                Joseph K. Bradley
              • Votes:
                0 Vote for this issue
                Watchers:
                3 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: