Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-16219

metastore notification_log contains serialized message with non functional fields

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Fixed
    • 2.2.0
    • 2.3.0, 3.0.0
    • Metastore
    • None
    • Reviewed

    Description

      the event notification logs stored in hive metastore have json serialized messages stored in NOTIFICATION_LOG table, these messages also store the serialized Thrift API objects in them. when doing a reply dump we are however serializing both the metadata for replication event + event Message + additional helper method getters representing the thrift objects.

      We should only serialize metadata for replication event + event Message
      for ex for create table :

      {
        "eventType": "CREATE_TABLE",
        "server": "",
        "servicePrincipal": "",
        "db": "default",
        "table": "a",
        "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
        "timestamp": 1489552350,
        "files": [],
        "tableObj": {
          "tableName": "a",
          "dbName": "default",
          "owner": "anagarwal",
          "createTime": 1489552350,
          "lastAccessTime": 0,
          "retention": 0,
          "sd": {
            "cols": [
              {
                "name": "name",
                "type": "string",
                "comment": null,
                "setName": true,
                "setType": true,
                "setComment": false
              }
            ],
            "location": "file:/tmp/warehouse/a",
            "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
            "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
            "compressed": false,
            "numBuckets": -1,
            "serdeInfo": {
              "name": null,
              "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
              "parameters": {
                "serialization.format": "\n",
                "field.delim": "\n"
              },
              "setName": false,
              "parametersSize": 2,
              "setParameters": true,
              "setSerializationLib": true
            },
            "bucketCols": [],
            "sortCols": [],
            "parameters": {},
            "skewedInfo": {
              "skewedColNames": [],
              "skewedColValues": [],
              "skewedColValueLocationMaps": {},
              "setSkewedColNames": true,
              "setSkewedColValues": true,
              "setSkewedColValueLocationMaps": true,
              "skewedColNamesSize": 0,
              "skewedColNamesIterator": [],
              "skewedColValuesSize": 0,
              "skewedColValuesIterator": [],
              "skewedColValueLocationMapsSize": 0
            },
            "storedAsSubDirectories": false,
            "setSkewedInfo": true,
            "parametersSize": 0,
            "colsSize": 1,
            "setParameters": true,
            "setLocation": true,
            "setInputFormat": true,
            "setCols": true,
            "setOutputFormat": true,
            "setSerdeInfo": true,
            "setBucketCols": true,
            "setSortCols": true,
            "colsIterator": [
              {
                "name": "name",
                "type": "string",
                "comment": null,
                "setName": true,
                "setType": true,
                "setComment": false
              }
            ],
            "bucketColsSize": 0,
            "bucketColsIterator": [],
            "sortColsSize": 0,
            "sortColsIterator": [],
            "setStoredAsSubDirectories": true,
            "setCompressed": true,
            "setNumBuckets": true
          },
          "partitionKeys": [],
          "parameters": {
            "totalSize": "0",
            "EXTERNAL": "TRUE",
            "numRows": "0",
            "rawDataSize": "0",
            "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}",
            "numFiles": "0",
            "transient_lastDdlTime": "1489552350"
          },
          "viewOriginalText": null,
          "viewExpandedText": null,
          "tableType": "EXTERNAL_TABLE",
          "privileges": {
            "userPrivileges": {
              "anagarwal": [
                {
                  "privilege": "INSERT",
                  "createTime": -1,
                  "grantor": "anagarwal",
                  "grantorType": "USER",
                  "grantOption": true,
                  "setCreateTime": true,
                  "setGrantOption": true,
                  "setPrivilege": true,
                  "setGrantor": true,
                  "setGrantorType": true
                },
                {
                  "privilege": "SELECT",
                  "createTime": -1,
                  "grantor": "anagarwal",
                  "grantorType": "USER",
                  "grantOption": true,
                  "setCreateTime": true,
                  "setGrantOption": true,
                  "setPrivilege": true,
                  "setGrantor": true,
                  "setGrantorType": true
                },
                {
                  "privilege": "UPDATE",
                  "createTime": -1,
                  "grantor": "anagarwal",
                  "grantorType": "USER",
                  "grantOption": true,
                  "setCreateTime": true,
                  "setGrantOption": true,
                  "setPrivilege": true,
                  "setGrantor": true,
                  "setGrantorType": true
                },
                {
                  "privilege": "DELETE",
                  "createTime": -1,
                  "grantor": "anagarwal",
                  "grantorType": "USER",
                  "grantOption": true,
                  "setCreateTime": true,
                  "setGrantOption": true,
                  "setPrivilege": true,
                  "setGrantor": true,
                  "setGrantorType": true
                }
              ]
            },
            "groupPrivileges": null,
            "rolePrivileges": null,
            "rolePrivilegesSize": 0,
            "setUserPrivileges": true,
            "setGroupPrivileges": false,
            "setRolePrivileges": false,
            "userPrivilegesSize": 1,
            "groupPrivilegesSize": 0
          },
          "temporary": false,
          "rewriteEnabled": false,
          "setTableName": true,
          "setDbName": true,
          "setOwner": true,
          "setViewOriginalText": false,
          "setViewExpandedText": false,
          "setTableType": true,
          "setPrivileges": true,
          "setCreateTime": true,
          "setLastAccessTime": true,
          "setRetention": true,
          "partitionKeysIterator": [],
          "parametersSize": 7,
          "setTemporary": true,
          "setRewriteEnabled": false,
          "setParameters": true,
          "setPartitionKeys": true,
          "setSd": true,
          "partitionKeysSize": 0
        }
      }
      

      it should only be the json message required as :

      {
        "eventType": "CREATE_TABLE",
        "server": "",
        "servicePrincipal": "",
        "db": "default",
        "table": "a",
        "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
        "timestamp": 1489552350,
        "files": [],
      }
      

      this will require adding serialization features to mapper use such that it only serializes the annotated fields.

      Attachments

        1. HIVE-16219.3.patch
          5 kB
          Anishek Agarwal

        Issue Links

          Activity

            People

              anishek Anishek Agarwal
              anishek Anishek Agarwal
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: