Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-17098

Race condition in Hbase tables

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Critical
    • Resolution: Fixed
    • 2.1.1
    • 3.0.0
    • HBase Handler
    • None

    Description

      These steps simulate our customer production env.

      STEP 1. Create test tables

      CREATE TABLE for_loading(
        key int, 
        value string,
        age int,
        salary decimal (10,2)
      ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
      

      Table test_1 belongs to user testuser1.

      CREATE TABLE test_1(
        key int, 
        value string,
        age int,
        salary decimal (10,2)
      )
      ROW FORMAT SERDE 
        'org.apache.hadoop.hive.hbase.HBaseSerDe' 
      STORED BY 
        'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
      WITH SERDEPROPERTIES ( 
        'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
        'serialization.format'='1')
      TBLPROPERTIES (
        'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
        'hbase.table.name'='test_1', 
        'numFiles'='0', 
        'numRows'='0', 
        'rawDataSize'='0', 
        'totalSize'='0', 
        'transient_lastDdlTime'='1495769316');
      

      Table test_2 belongs to user testuser2.

      CREATE TABLE test_2(
        key int, 
        value string,
        age int,
        salary decimal (10,2)
      )
      ROW FORMAT SERDE 
        'org.apache.hadoop.hive.hbase.HBaseSerDe' 
      STORED BY 
        'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
      WITH SERDEPROPERTIES ( 
        'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
        'serialization.format'='1')
      TBLPROPERTIES (
        'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
        'hbase.table.name'='test_2', 
        'numFiles'='0', 
        'numRows'='0', 
        'rawDataSize'='0', 
        'totalSize'='0', 
        'transient_lastDdlTime'='1495769316');
      

      STEP 2. Create test data

      import java.io.IOException;
      import java.math.BigDecimal;
      import java.nio.charset.Charset;
      import java.nio.file.Files;
      import java.nio.file.Path;
      import java.nio.file.Paths;
      import java.nio.file.StandardOpenOption;
      import java.util.ArrayList;
      import java.util.Arrays;
      import java.util.List;
      import java.util.Random;
      
      import static java.lang.String.format;
      
      public class Generator {
          private static List<String> lines = new ArrayList<>();
          private static List<String> name = Arrays.asList("Brian", "John", "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo");
          private static List<BigDecimal> salary = new ArrayList<>();
      
          public static void main(String[] args) {
              generateData(Integer.parseInt(args[0]), args[1]);
          }
      
          public static void generateData(int rowNumber, String file) {
      
              double maxValue = 20000.55;
              double minValue = 1000.03;
      
              Random random = new Random();
              for (int i = 1; i <= rowNumber; i++) {
                  lines.add(
                      i + "," +
                          name.get(random.nextInt(name.size())) + "," +
                          (random.nextInt(62) + 18) + "," +
                          format("%.2f", (minValue + (maxValue - minValue) * random.nextDouble())));
              }
      
              Path path = Paths.get(file);
      
              try {
                  Files.write(path, lines, Charset.forName("UTF-8"), StandardOpenOption.APPEND);
              } catch (IOException e) {
                  e.printStackTrace();
              }
          }
      }
      
      javac Generator.java
      java Generator 3000000 dataset.csv
      hadoop fs -put dataset.csv /
      

      STEP 3. Upload test data

      load data local inpath '/home/myuser/dataset.csv' into table for_loading;
      
      from for_loading
      insert into table test_1
      select key,value,age,salary;
      
      from for_loading
      insert into table test_2
      select key,value,age,salary;
      

      STEP 4. Run test queries

      Run in 5 parallel terminals for table test_1

      for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done
      

      Run in 5 parallel terminals for table test_2

      for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done
      

      EXPECTED RESULT:

      All queris are OK.

      ACTUAL RESULT

      org.apache.hive.service.cli.HiveSQLException: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly ini
      tialized. Ensure you call initializeTable either in your constructor or initialize method
              at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484)
              at org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308)
              at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847)
              at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source)
              at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
              at java.lang.reflect.Method.invoke(Method.java:606)
              at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
              at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
              at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
              at java.security.AccessController.doPrivileged(Native Method)
              at javax.security.auth.Subject.doAs(Subject.java:415)
              at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
              at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
              at com.sun.proxy.$Proxy25.fetchResults(Unknown Source)
              at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504)
              at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698)
              at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
              at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
              at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
              at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
              at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
              at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
              at java.lang.Thread.run(Thread.java:745)
      Caused by: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
              at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
              at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
              at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099)
              at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479)
              ... 24 more
      Caused by: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
              at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579)
              at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225)
              at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261)
              at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525)
              at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
              at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459)
              ... 28 more
      

      Attachments

        1. HIVE-17098.1.patch
          5 kB
          Oleksiy Sayankin
        2. HIVE-17098.2.patch
          15 kB
          Oleksiy Sayankin

        Issue Links

          Activity

            People

              osayankin Oleksiy Sayankin
              osayankin Oleksiy Sayankin
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: