Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-19743

hive is not pushing predicate down to HBaseStorageHandler if hive key mapped with hbase is stored as varchar

    XMLWordPrintableJSON

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Major
    • Resolution: Invalid
    • Affects Version/s: 2.1.0
    • Fix Version/s: None
    • Component/s: HBase Handler, Hive
    • Labels:
      None
    • Environment:

      java8,centos7

      Description

      Steps to Reproduce:

      
      //hbase table
      
      create 'mytable', 'cf'
      put 'mytable', 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4', 'cf:message', 'hello world'
      put 'mytable', 'ABCDEF1|GHIJK1|ijj123kl-mn4o-4pq5-678r-st90123u0v41', 'cf:foo', 0x0
      
      // hive table with key stored as varchar
      
      show create table hbase_table_4;
      
      +-----------------------------------------------------------+--+
      
      |                      createtab_stmt                       |
      
      +-----------------------------------------------------------+--+
      
      | CREATE EXTERNAL TABLE `hbase_table_4`(                    |
      
      |   `hbase_key` varchar(80) COMMENT 'from deserializer',    |
      
      |   `value` string COMMENT 'from deserializer',             |
      
      |   `value1` string COMMENT 'from deserializer')            |
      
      | ROW FORMAT SERDE                                          |
      
      |   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |
      
      | STORED BY                                                 |
      
      |   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |
      
      | WITH SERDEPROPERTIES (                                    |
      
      |   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |
      
      |   'serialization.format'='1')                             |
      
      | TBLPROPERTIES (                                           |
      
      |   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |
      
      |   'hbase.table.name'='mytable',                           |
      
      |   'numFiles'='0',                                         |
      
      |   'numRows'='0',                                          |
      
      |   'rawDataSize'='0',                                      |
      
      |   'totalSize'='0',                                        |
      
      |   'transient_lastDdlTime'='1527708430')                   |
      
      +-----------------------------------------------------------+--+
      
       
      
      // hive table key stored as string
      
      CREATE EXTERNAL TABLE `hbase_table_5`(                    |
      
      |   `hbase_key` string COMMENT 'from deserializer',         |
      
      |   `value` string COMMENT 'from deserializer',             |
      
      |   `value1` string COMMENT 'from deserializer')            |
      
      | ROW FORMAT SERDE                                          |
      
      |   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |
      
      | STORED BY                                                 |
      
      |   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |
      
      | WITH SERDEPROPERTIES (                                    |
      
      |   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |
      
      |   'serialization.format'='1')                             |
      
      | TBLPROPERTIES (                                           |
      
      |   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |
      
      |   'hbase.table.name'='mytable',                           |
      
      |   'numFiles'='0',                                         |
      
      |   'numRows'='0',                                          |
      
      |   'rawDataSize'='0',                                      |
      
      |   'totalSize'='0',                                        |
      
      |   'transient_lastDdlTime'='1527708520')                   |
      
       
      
      Explain Plan
      
       explain select * from hbase_table_4 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4'
      
       Stage-0                                                                                          |
      
      |   Fetch Operator                                                                                 |
      
      |     limit:-1                                                                                     |
      
      |     Select Operator [SEL_2]                                                                      |
      
      |       Output:["_col0","_col1","_col2"]                                                           |
      
      |       Filter Operator [FIL_4]                                                                    |
      
      |         predicate:(UDFToString(hbase_key) = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')  |
      
      |         TableScan [TS_0]                                                                         |
      
      |           Output:["hbase_key","value","value1"] 
      
       
      
      explain on table with key stored as string
      
      explain select * from hbase_table_5 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4';
      
       Plan optimized by CBO.                  |
      
      |                                         |
      
      | Stage-0                                 |
      
      |   Fetch Operator                        |
      
      |     limit:-1                            |
      
      |     Select Operator [SEL_2]             |
      
      |       Output:["_col0","_col1","_col2"]  |
      
      |       TableScan [TS_0]                  |
      
      |         Output:["value","value1"] 
      
      

       

      predicate push down correctly on table which has hbase row key as string 

      
      2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972)) - Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
      
      2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(975)) - Pushed predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
      
      2018-05-30 21:26:45,418 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138)) - After PPD:
      
      TS[0]-SEL[2]-FS[3]
      
      

       while row key stored as varchar the predicate dont pushed down to predicate and fallback to residual

      
      2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972)) - Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
      
      2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(980)) - Residual predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')
      
      2018-05-30 21:29:29,303 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138)) - After PPD:
      
      TS[0]-FIL[4]-SEL[2]-FS[3]
      
      

       

       

       

       

       

        Attachments

          Activity

            People

            • Assignee:
              Rajkumar Singh Rajkumar Singh
              Reporter:
              Rajkumar Singh Rajkumar Singh
            • Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: