Uploaded image for project: 'CarbonData'
  1. CarbonData
  2. CARBONDATA-3987

Issues in SDK Pagination reader (2 issues)

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Minor
    • Resolution: Fixed
    • 2.1.0
    • 2.1.1
    • other
    • None

    Description

      Issue 1 :
      write data to table and insert into one more row , error is thrown when try to read new added row where as getTotalRows get incremented by 1.

      Test code-
      /**

      • Carbon Files are written using CarbonWriter in outputpath
        *
      • Carbon Files are read using paginationCarbonReader object
      • Checking pagination with insert on large data with 8 split
        */
        @Test
        public void testSDKPaginationInsertData() throws IOException, InvalidLoadOptionException, InterruptedException { System.out.println("___________________________________________" + name.getMethodName() + " TestCase Execution is started________________________________________________"); // // String outputPath1 = getOutputPath(outputDir, name.getMethodName() + "large"); // // long uid = 123456; // TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai")); // writeMultipleCarbonFiles("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", getDatas(), outputPath1, uid, null, null); // // System.out.println("Data is written"); List<String[]> data1 = new ArrayList<String[]>(); String[] row1 = \{"1", "AAA", "3", "3444345.66", "true", "1979-12-09", "2011-2-10 1:00:20", "Pune", "IT"}

        ;
        String[] row2 = {"2", "BBB", "2", "543124.66", "false", "1987-2-19", "2017-1-1 12:00:20", "Bangalore", "DATA"};
        String[] row3 = {"3", "CCC", "1", "787878.888", "false", "1982-05-12", "2015-12-1 2:20:20", "Pune", "DATA"};
        String[] row4 = {"4", "DDD", "1", "99999.24", "true", "1981-04-09", "2000-1-15 7:00:20", "Delhi", "MAINS"};
        String[] row5 = {"5", "EEE", "3", "545656.99", "true", "1987-12-09", "2017-11-25 04:00:20", "Delhi", "IT"};

      data1.add(row1);
      data1.add(row2);
      data1.add(row3);
      data1.add(row4);
      data1.add(row5);

      String outputPath1 = getOutputPath(outputDir, name.getMethodName() + "large");

      long uid = 123456;
      TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
      writeMultipleCarbonFiles("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", data1, outputPath1, uid, null, null);

      System.out.println("Data is written");

      String hdfsPath1 = moveFiles(outputPath1, outputPath1);
      String datapath1 = hdfsPath1.concat("/" + name.getMethodName() + "large");
      System.out.println("HDFS Data Path is: " + datapath1);

      runSQL("create table " + name.getMethodName() + "large" + " using carbon location '" + datapath1 + "'");
      System.out.println("Table " + name.getMethodName() + " is created Successfully");
      runSQL("select count from " + name.getMethodName() + "large");

      long uid1 = 123;
      String outputPath = getOutputPath(outputDir, name.getMethodName());
      List<String[]> data = new ArrayList<String[]>();
      String[] row = {"222", "Daisy", "3", "334.456", "true", "1956-11-08", "2013-12-10 12:00:20", "Pune", "IT"};
      data.add(row);
      writeData("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", data, outputPath, uid, null, null);
      String hdfsPath = moveFiles(outputPath, outputPath);
      String datapath = hdfsPath.concat("/" + name.getMethodName());

      runSQL("create table " + name.getMethodName() + " using carbon location '" + datapath + "'");
      runSQL("select count from " + name.getMethodName());
      System.out.println("---Insert-----");
      runSQL("insert into table " + name.getMethodName() + " select * from " + name.getMethodName() + "large");
      System.out.println("Inserted");
      System.out.println("---------After Insert-------------");
      System.out.println("---Query 1---");
      runSQL("select count from " + name.getMethodName());

      // configure cache size = 4 blocklet
      CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, "4");

      CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(datapath, "_temp").withPaginationSupport().projection(new String[]{"id","name","rank","salary","active","dob","doj","city","dept"});
      PaginationCarbonReader<Object> paginationCarbonReader =
      (PaginationCarbonReader<Object>) carbonReaderBuilder.build();

      File[] dataFiles1 = new File(datapath).listFiles(new FilenameFilter() {
      @Override public boolean accept(File dir, String name)

      { return name.endsWith("carbondata"); }
      });
      String version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
      System.out.println("version "+version);

      System.out.println("Total no of rows is : "+paginationCarbonReader.getTotalRows() );
      assertTrue(paginationCarbonReader.getTotalRows() == 6);

      Object[] rows=paginationCarbonReader.read(1,6);
      //assertTrue(rows.length==5);
      for (Object rowss : rows) { System.out.println(((Object[]) rowss)[0]); // assertTrue (((Object[]) row)[1].equals(5001)); }

      // close the reader
      paginationCarbonReader.close();

      }

       

      Issue 2 : when filter () is used to filter certain row . getTotalRows() still showing previous total no of row where as when try to read all the row getting error.

      /**
      * Carbon Files are written using CarbonWriter in outputpath
      *
      * Carbon Files are read using paginationCarbonReader object with filter
      */
      @Test
      public void testSDKPaginationFilter() throws IOException, InvalidLoadOptionException, InterruptedException { System.out.println("___________________________________________" + name.getMethodName() + " TestCase Execution is started________________________________________________"); List<String []> data =new ArrayList<String []>(); String [] row1= \{"100","MNO","A","1001"};
      String [] row2= {"100","MNOP","C","3001"};
      String [] row3= {"100","MNOQ","X","2001"};
      String [] row4= {"100","MNOR","Z","7001"};
      String [] row5= {"100","MNOS","P","5001"};
      data.add(row1);
      data.add(row2);
      data.add(row3);
      data.add(row4);
      data.add(row5);

      String outputPath=getOutputPath(outputDir,name.getMethodName());
      boolean isTransactionalTable=false;
      long uid=System.currentTimeMillis();
      String blockletsize= String.valueOf(2);
      String blocksize= String.valueOf(4);
      String [] sortColumns={"c4","c3"};
      writeData("c1 int,c2 string,c3 string,c4 int",data,outputPath, uid,blocksize,blockletsize,sortColumns);
      System.out.println("Data is written");

      String hdfsPath = moveFiles(outputPath, outputPath);
      String dataPath = hdfsPath.concat("/" + name.getMethodName());
      System.out.println("HDFS Data Path is: " + dataPath);

      // configure cache size = 4 blocklet
      CarbonProperties.getInstance()
      .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, "4");

      //filter expression
      EqualToExpression equalExpression =
      new EqualToExpression(new ColumnExpression("c3", DataTypes.STRING),
      new LiteralExpression("P", DataTypes.STRING));

      CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(dataPath, "_temp").withPaginationSupport().projection(new String[]{"c2", "c4"}).filter(equalExpression);
      PaginationCarbonReader<Object> paginationCarbonReader =
      (PaginationCarbonReader<Object>) carbonReaderBuilder.build();


      File[] dataFiles1 = new File(dataPath).listFiles(new FilenameFilter() {
      @Override public boolean accept(File dir, String name) { return name.endsWith("carbondata"); }

      });
      String version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
      System.out.println("version "+version);

      System.out.println("Total no of rows is : "+paginationCarbonReader.getTotalRows() );
      assertTrue(paginationCarbonReader.getTotalRows() == 5);

      Object[] rows=paginationCarbonReader.read(1,5);
      for (Object row : rows)

      { System.out.println(((Object[]) row)[0]); // assertTrue (((Object[]) row)[1].equals(5001)); }

      // close the reader
      paginationCarbonReader.close();
      }

       

      Attachments

        Activity

          People

            Unassigned Unassigned
            chetdb Chetan Bhat
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Time Tracking

                Estimated:
                Original Estimate - Not Specified
                Not Specified
                Remaining:
                Remaining Estimate - 0h
                0h
                Logged:
                Time Spent - 6.5h
                6.5h