Uploaded image for project: 'Apache MADlib'
  1. Apache MADlib
  2. MADLIB-1322

MLP with minibatch fails for integer dependent variable on PostgreSQL

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Minor
    • Resolution: Won't Fix
    • None
    • v1.16
    • None

    Description

      Fails on postgres 9.6. On Greenplum it does not fail as far as I know.

      DROP TABLE IF EXISTS iris_data;
      CREATE TABLE iris_data(
          id serial,
          attributes numeric[],
          class_text varchar,
          class integer,
          state varchar
      );
      INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES
      (1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),
      (2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),
      (3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),
      (4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),
      (5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),
      (6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),
      (7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),
      (8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),
      (9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),
      (10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),
      (11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),
      (12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),
      (13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),
      (14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),
      (15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),
      (16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),
      (17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),
      (18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),
      (19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),
      (20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),
      (21,ARRAY[6.1,2.9,4.7,1.4],'Iris_versicolor',2,'Alaska'),
      (22,ARRAY[5.6,2.9,3.6,1.3],'Iris_versicolor',2,'Alaska'),
      (23,ARRAY[6.7,3.1,4.4,1.4],'Iris_versicolor',2,'Alaska'),
      (24,ARRAY[5.6,3.0,4.5,1.5],'Iris_versicolor',2,'Alaska'),
      (25,ARRAY[5.8,2.7,4.1,1.0],'Iris_versicolor',2,'Alaska'),
      (26,ARRAY[6.2,2.2,4.5,1.5],'Iris_versicolor',2,'Alaska'),
      (27,ARRAY[5.6,2.5,3.9,1.1],'Iris_versicolor',2,'Alaska'),
      (28,ARRAY[5.0,3.4,1.5,0.2],'Iris_setosa',1,'Tennessee'),
      (29,ARRAY[4.4,2.9,1.4,0.2],'Iris_setosa',1,'Tennessee'),
      (30,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Tennessee'),
      (31,ARRAY[5.4,3.7,1.5,0.2],'Iris_setosa',1,'Tennessee'),
      (32,ARRAY[4.8,3.4,1.6,0.2],'Iris_setosa',1,'Tennessee'),
      (33,ARRAY[4.8,3.0,1.4,0.1],'Iris_setosa',1,'Tennessee'),
      (34,ARRAY[4.3,3.0,1.1,0.1],'Iris_setosa',1,'Tennessee'),
      (35,ARRAY[5.8,4.0,1.2,0.2],'Iris_setosa',1,'Tennessee'),
      (36,ARRAY[5.7,4.4,1.5,0.4],'Iris_setosa',1,'Tennessee'),
      (37,ARRAY[5.4,3.9,1.3,0.4],'Iris_setosa',1,'Tennessee'),
      (38,ARRAY[6.0,2.9,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
      (39,ARRAY[5.7,2.6,3.5,1.0],'Iris_versicolor',2,'Tennessee'),
      (40,ARRAY[5.5,2.4,3.8,1.1],'Iris_versicolor',2,'Tennessee'),
      (41,ARRAY[5.5,2.4,3.7,1.0],'Iris_versicolor',2,'Tennessee'),
      (42,ARRAY[5.8,2.7,3.9,1.2],'Iris_versicolor',2,'Tennessee'),
      (43,ARRAY[6.0,2.7,5.1,1.6],'Iris_versicolor',2,'Tennessee'),
      (44,ARRAY[5.4,3.0,4.5,1.5],'Iris_versicolor',2,'Tennessee'),
      (45,ARRAY[6.0,3.4,4.5,1.6],'Iris_versicolor',2,'Tennessee'),
      (46,ARRAY[6.7,3.1,4.7,1.5],'Iris_versicolor',2,'Tennessee'),
      (47,ARRAY[6.3,2.3,4.4,1.3],'Iris_versicolor',2,'Tennessee'),
      (48,ARRAY[5.6,3.0,4.1,1.3],'Iris_versicolor',2,'Tennessee'),
      (49,ARRAY[5.5,2.5,4.0,1.3],'Iris_versicolor',2,'Tennessee'),
      (50,ARRAY[5.5,2.6,4.4,1.2],'Iris_versicolor',2,'Tennessee'),
      (51,ARRAY[6.1,3.0,4.6,1.4],'Iris_versicolor',2,'Tennessee'),
      (52,ARRAY[5.8,2.6,4.0,1.2],'Iris_versicolor',2,'Tennessee');
      

      Works OK if dependent variable is TEXT:

      DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
      SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table
                                           'iris_data_packed',  -- Output table
                                           'class_text',        -- Dependent variable TEXT
                                           'attributes'        -- Independent variables
                                          );
      
      \d+ iris_data_packed
                                     Table "public.iris_data_packed"
             Column        |        Type        | Modifiers | Storage  | Stats target | Description 
      ---------------------+--------------------+-----------+----------+--------------+-------------
       __id__              | bigint             |           | plain    |              | 
       dependent_varname   | double precision[] |           | extended |              | 
       independent_varname | double precision[] |           | extended |              | 
      
      
      DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
      -- Set seed so results are reproducible
      SELECT setseed(0);
      SELECT madlib.mlp_classification(
          'iris_data_packed',      -- Output table from mini-batch preprocessor
          'mlp_model',             -- Destination table
          'independent_varname',   -- Hardcode to this, from table iris_data_packed
          'dependent_varname',     -- Hardcode to this, from table iris_data_packed
          ARRAY[5],                -- Number of units per layer
          'learning_rate_init=0.1,
          n_iterations=5,
          tolerance=0',            -- Optimizer params
          'tanh',                  -- Activation function
          NULL,                    -- Default weight (1)
          FALSE,                   -- No warm start
          TRUE                    -- Not verbose
      );
      
      INFO:  Iteration: 1, Loss: <0.990848103579>
      INFO:  Iteration: 2, Loss: <0.852423978558>
      INFO:  Iteration: 3, Loss: <0.689764103374>
      INFO:  Iteration: 4, Loss: <0.530458765792>
       mlp_classification 
      --------------------
       
      (1 row)
      

      Does not work if dependent variable is INTEGER:

      DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;
      SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table
                                           'iris_data_packed',  -- Output table
                                           'class',        -- Dependent variable INTEGER
                                           'attributes',        -- Independent variables
                                           NULL, -- grouping
                                           NULL, -- buffer size (or size of the mini-batch)
                                           TRUE -- Encode scalar int dependent variable
                                           );
      
      \d+ iris_data_packed
                                     Table "public.iris_data_packed"
             Column        |        Type        | Modifiers | Storage  | Stats target | Description 
      ---------------------+--------------------+-----------+----------+--------------+-------------
       __id__              | bigint             |           | plain    |              | 
       dependent_varname   | double precision[] |           | extended |              | 
       independent_varname | double precision[] |           | extended |              | 
      
      
      DROP TABLE IF EXISTS mlp_model, mlp_model_summary, mlp_model_standardization;
      -- Set seed so results are reproducible
      SELECT setseed(0);
      SELECT madlib.mlp_classification(
          'iris_data_packed',      -- Output table from mini-batch preprocessor
          'mlp_model',             -- Destination table
          'independent_varname',   -- Hardcode to this, from table iris_data_packed
          'dependent_varname',     -- Hardcode to this, from table iris_data_packed
          ARRAY[5],                -- Number of units per layer
          'learning_rate_init=0.1,
          n_iterations=10,
          tolerance=0',            -- Optimizer params
          'tanh',                  -- Activation function
          NULL,                    -- Default weight (1)
          FALSE,                   -- No warm start
          TRUE                    -- Not verbose
      );
      
      ERROR:  TypeError: must be string, not int
      CONTEXT:  Traceback (most recent call last):
        PL/Python function "mlp_classification", line 33, in <module>
          grouping_col)
        PL/Python function "mlp_classification", line 42, in wrapper
        PL/Python function "mlp_classification", line 147, in mlp
        PL/Python function "mlp_classification", line 74, in quote_literal
      PL/Python function "mlp_classification"
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            fmcquillan Frank McQuillan
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: