Details
Description
This example
package org.apache.ignite.examples.ml.tutorial; import java.io.FileNotFoundException; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCache; import org.apache.ignite.Ignition; import org.apache.ignite.ml.composition.ModelsComposition; import org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory; import org.apache.ignite.ml.composition.boosting.convergence.median.MedianOfMedianConvergenceCheckerFactory; import org.apache.ignite.ml.dataset.feature.extractor.Vectorizer; import org.apache.ignite.ml.dataset.feature.extractor.impl.DummyVectorizer; import org.apache.ignite.ml.math.primitives.vector.Vector; import org.apache.ignite.ml.preprocessing.Preprocessor; import org.apache.ignite.ml.preprocessing.encoding.EncoderTrainer; import org.apache.ignite.ml.preprocessing.encoding.EncoderType; import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer; import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer; import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer; import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator; import org.apache.ignite.ml.selection.scoring.metric.MetricName; import org.apache.ignite.ml.selection.split.TrainTestDatasetSplitter; import org.apache.ignite.ml.selection.split.TrainTestSplit; import org.apache.ignite.ml.trainers.DatasetTrainer; import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer; /** * {@link MinMaxScalerTrainer} and {@link NormalizationTrainer} are used in this example due to different values * distribution in columns and rows. * <p> * Code in this example launches Ignite grid and fills the cache with test data (based on Titanic passengers data).</p> * <p> * After that it defines preprocessors that extract features from an upstream data and perform other desired changes * over the extracted data, including the scaling.</p> * <p> * Then, it trains the model based on the processed data using decision tree classification.</p> * <p> * Finally, this example uses {@link Evaluator} functionality to compute metrics from predictions.</p> */ public class Step_11_Boosting { /** * Run example. */ public static void main(String[] args) { System.out.println(); System.out.println(">>> Tutorial step 11 (Boosting) example started."); try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) { try { IgniteCache<Integer, Vector> dataCache = TitanicUtils.readPassengers(ignite); // Extracts "pclass", "sibsp", "parch", "sex", "embarked", "age", "fare". final Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>(0, 3, 4, 5, 6, 8, 10).labeled(1); TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>() .split(0.75); Preprocessor<Integer, Vector> strEncoderPreprocessor = new EncoderTrainer<Integer, Vector>() .withEncoderType(EncoderType.STRING_ENCODER) .withEncodedFeature(1) .withEncodedFeature(6) // <--- Changed index here. .fit(ignite, dataCache, vectorizer ); Preprocessor<Integer, Vector> imputingPreprocessor = new ImputerTrainer<Integer, Vector>() .fit(ignite, dataCache, strEncoderPreprocessor ); Preprocessor<Integer, Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Vector>() .fit( ignite, dataCache, imputingPreprocessor ); Preprocessor<Integer, Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Vector>() .withP(1) .fit( ignite, dataCache, minMaxScalerPreprocessor ); // Create classification trainer. DatasetTrainer<ModelsComposition, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.01, 1000, 10, 0.) .withCheckConvergenceStgyFactory(new MedianOfMedianConvergenceCheckerFactory(0.01)); // Train decision tree model. ModelsComposition mdl = trainer.fit( ignite, dataCache, split.getTrainFilter(), normalizationPreprocessor ); System.out.println("\n>>> Trained model: " + mdl); double accuracy = Evaluator.evaluate( dataCache, split.getTestFilter(), mdl, normalizationPreprocessor, MetricName.ACCURACY ); System.out.println("\n>>> Accuracy " + accuracy); System.out.println("\n>>> Test Error " + (1 - accuracy)); System.out.println(">>> Tutorial step 11 (Boosting) example completed."); } catch (FileNotFoundException e) { e.printStackTrace(); } } finally { System.out.flush(); } } }
is ruined with
Exception in thread "main" java.lang.IllegalArgumentExceptionException in thread "main" java.lang.IllegalArgumentException at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:105) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:100) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:102) at org.apache.ignite.ml.tree.DecisionTree.printTree(DecisionTree.java:328) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:123) at org.apache.ignite.ml.tree.DecisionTreeConditionalNode.toString(DecisionTreeConditionalNode.java:118) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.ml.util.ModelTrace.lambda$fieldToString$1(ModelTrace.java:122) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.fieldToString(ModelTrace.java:123) at org.apache.ignite.ml.util.ModelTrace.lambda$toString$0(ModelTrace.java:97) at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1374) at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481) at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471) at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499) at org.apache.ignite.ml.util.ModelTrace.toString(ModelTrace.java:98) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:101) at org.apache.ignite.ml.composition.ModelsComposition.toString(ModelsComposition.java:93) at java.lang.String.valueOf(String.java:2994) at java.lang.StringBuilder.append(StringBuilder.java:131) at org.apache.ignite.examples.ml.tutorial.Step_11_Boosting.main(Step_11_Boosting.java:117)
Attachments
Issue Links
- relates to
-
IGNITE-12274 [ML] DecisionTree works incorrectly if maxDeep > amount of features
- Open
- links to