Uploaded image for project: 'Hivemall'
  1. Hivemall
  2. HIVEMALL-119

Fail to use xgboost on Hive

    XMLWordPrintableJSON

    Details

      Description

      This error rises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too.
      It seems that labels aren't set properly.

      make xgboost-native-local
      mvn package -Dmaven.test.skip=true
      docker-compose -f resources/docker/docker-compose.yml build
      docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall
      

      On docker

      bin/prepare_iris.sh
      hive
      

      On hive

      -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar;
      -- source /opt/hivemall/resources/ddl/define-all.hive;
      add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar;
      source /opt/hivemall/resources/ddl/define-additional.hive;
      
      set hivevar:f0_min=4.3;
      set hivevar:f0_max=7.9;
      set hivevar:f1_min=2.0;
      set hivevar:f1_max=4.4;
      set hivevar:f2_min=1.0;
      set hivevar:f2_max=6.9;
      set hivevar:f3_min=0.1;
      set hivevar:f3_max=2.5;
      
      use iris;
      create or replace view iris_scaled
      as
      select
        rowid, 
        label,
        add_bias(array(
           concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), 
           concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), 
           concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), 
           concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max}))
        )) as features
      from 
        iris_raw;
      
      -- select * from iris_scaled limit 3;
      -- 1       Iris-setosa     ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"]
      -- 2       Iris-setosa     ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"]
      -- 3       Iris-setosa     ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"]
      
      select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception
      
      Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String
      [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
      org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
      Check failed: (info.labels.size()) != (0) label set cannot be empty
              at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
              at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
              at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
              at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
              at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
              at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
              at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
              at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
              at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
              at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
              at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
              at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
              at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
              at java.lang.reflect.Method.invoke(Method.java:606)
              at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
              at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
      org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
              at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
              at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
              at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
              at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
              at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
              at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
              at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
              at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
              at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
              at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
              at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
              at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
              at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
              at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
              at java.lang.reflect.Method.invoke(Method.java:606)
              at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
              at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
      Time taken: 3.375 seconds
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                maropu Takeshi Yamamuro
                Reporter:
                amaya ITO Ryuichi
              • Votes:
                0 Vote for this issue
                Watchers:
                3 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: