Uploaded image for project: 'Ignite'
  1. Ignite
  2. IGNITE-12269

[ML] The method printTree is corrupted on the example

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Critical
    • Resolution: Fixed
    • 2.8
    • 2.8
    • ml
    • None
    •  The method printTree is corrupted on the example
    • Release Notes Required

    Description

      This example

      package org.apache.ignite.examples.ml.tutorial;
      
      import java.io.FileNotFoundException;
      import org.apache.ignite.Ignite;
      import org.apache.ignite.IgniteCache;
      import org.apache.ignite.Ignition;
      import org.apache.ignite.ml.composition.ModelsComposition;
      import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory;
      import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory;
      import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer;
      import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer;
      import org.apache.ignite.ml.math.primitives.vector.Vector;
      import org.apache.ignite.ml.preprocessing.Preprocessor;
      import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer;
      import org.apache.ignite.ml.preprocessing.encoding.EncoderType;
      import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
      import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
      import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
      import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
      import org.apache.ignite.ml.selection.scoring.metric.MetricName;
      import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter;
      import org.apache.ignite.ml.selection.split.TrainTestSplit;
      import org.apache.ignite.ml.trainers.DatasetTrainer;
      import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
      
      /**
       * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values
       * distribution in columns and rows.
       * <p>
       * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p>
       * <p>
       * After that it defines preprocessors that extract features from an upstream data and perform other desired changes
       * over the extracted data, including the scaling.</p>
       * <p>
       * Then, it trains the model based on the processed data using decision tree classification.</p>
       * <p>
       * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p>
       */
      public class Step_11_Boosting {
          /**
           * Run example.
           */
          public static void main(String[] args) {
              System.out.println();
              System.out.println(">>> Tutorial step 11 (Boosting) example started.");
      
              try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
                  try {
                      IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite);
      
                      // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare".
                      final Vectorizer<Integer, Vector, Integer, Double> vectorizer
                          = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1);
      
                      TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>()
                          .split(0.75);
      
                      Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>()
                          .withEncoderType(EncoderType.STRING_ENCODER)
                          .withEncodedFeature(1)
                          .withEncodedFeature(6) // <--- Changed index here.
                          .fit(ignite,
                              dataCache,
                              vectorizer
                          );
      
                      Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>()
                          .fit(ignite,
                              dataCache,
                              strEncoderPreprocessor
                          );
      
                      Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>()
                          .fit(
                              ignite,
                              dataCache,
                              imputingPreprocessor
                          );
      
                      Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>()
                          .withP(1)
                          .fit(
                              ignite,
                              dataCache,
                              minMaxScalerPreprocessor
                          );
      
                      // Create classification trainer.
                      DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.)
                          .withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01));
      
                      // Train decision tree model.
                      ModelsComposition mdl = trainer.fit(
                          ignite,
                          dataCache,
                          split.getTrainFilter(),
                          normalizationPreprocessor
                      );
      
                      System.out.println("\n>>> Trained model: " + mdl);
      
                      double accuracy = Evaluator.evaluate(
                          dataCache,
                          split.getTestFilter(),
                          mdl,
                          normalizationPreprocessor,
                          MetricName.ACCURACY
                      );
      
                      System.out.println("\n>>> Accuracy " + accuracy);
                      System.out.println("\n>>> Test Error " + (1 - accuracy));
      
                      System.out.println(">>> Tutorial step 11 (Boosting) example completed.");
                  }
                  catch (FileNotFoundException e) {
                      e.printStackTrace();
                  }
              }
              finally {
                  System.out.flush();
              }
          }
      }
      
      

      is ruined with

      Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
      

       

       

      Attachments

        Issue Links

          Activity

            People

              zaleslaw Alexey Zinoviev
              zaleslaw Alexey Zinoviev
              Votes:
              0 Vote for this issue
              Watchers:
              1 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 20m
                  20m