Details
Description
the event notification logs stored in hive metastore have json serialized messages stored in NOTIFICATION_LOG table, these messages also store the serialized Thrift API objects in them. when doing a reply dump we are however serializing both the metadata for replication event + event Message + additional helper method getters representing the thrift objects.
We should only serialize metadata for replication event + event Message
for ex for create table :
{ "eventType": "CREATE_TABLE", "server": "", "servicePrincipal": "", "db": "default", "table": "a", "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}", "timestamp": 1489552350, "files": [], "tableObj": { "tableName": "a", "dbName": "default", "owner": "anagarwal", "createTime": 1489552350, "lastAccessTime": 0, "retention": 0, "sd": { "cols": [ { "name": "name", "type": "string", "comment": null, "setName": true, "setType": true, "setComment": false } ], "location": "file:/tmp/warehouse/a", "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", "compressed": false, "numBuckets": -1, "serdeInfo": { "name": null, "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "parameters": { "serialization.format": "\n", "field.delim": "\n" }, "setName": false, "parametersSize": 2, "setParameters": true, "setSerializationLib": true }, "bucketCols": [], "sortCols": [], "parameters": {}, "skewedInfo": { "skewedColNames": [], "skewedColValues": [], "skewedColValueLocationMaps": {}, "setSkewedColNames": true, "setSkewedColValues": true, "setSkewedColValueLocationMaps": true, "skewedColNamesSize": 0, "skewedColNamesIterator": [], "skewedColValuesSize": 0, "skewedColValuesIterator": [], "skewedColValueLocationMapsSize": 0 }, "storedAsSubDirectories": false, "setSkewedInfo": true, "parametersSize": 0, "colsSize": 1, "setParameters": true, "setLocation": true, "setInputFormat": true, "setCols": true, "setOutputFormat": true, "setSerdeInfo": true, "setBucketCols": true, "setSortCols": true, "colsIterator": [ { "name": "name", "type": "string", "comment": null, "setName": true, "setType": true, "setComment": false } ], "bucketColsSize": 0, "bucketColsIterator": [], "sortColsSize": 0, "sortColsIterator": [], "setStoredAsSubDirectories": true, "setCompressed": true, "setNumBuckets": true }, "partitionKeys": [], "parameters": { "totalSize": "0", "EXTERNAL": "TRUE", "numRows": "0", "rawDataSize": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "numFiles": "0", "transient_lastDdlTime": "1489552350" }, "viewOriginalText": null, "viewExpandedText": null, "tableType": "EXTERNAL_TABLE", "privileges": { "userPrivileges": { "anagarwal": [ { "privilege": "INSERT", "createTime": -1, "grantor": "anagarwal", "grantorType": "USER", "grantOption": true, "setCreateTime": true, "setGrantOption": true, "setPrivilege": true, "setGrantor": true, "setGrantorType": true }, { "privilege": "SELECT", "createTime": -1, "grantor": "anagarwal", "grantorType": "USER", "grantOption": true, "setCreateTime": true, "setGrantOption": true, "setPrivilege": true, "setGrantor": true, "setGrantorType": true }, { "privilege": "UPDATE", "createTime": -1, "grantor": "anagarwal", "grantorType": "USER", "grantOption": true, "setCreateTime": true, "setGrantOption": true, "setPrivilege": true, "setGrantor": true, "setGrantorType": true }, { "privilege": "DELETE", "createTime": -1, "grantor": "anagarwal", "grantorType": "USER", "grantOption": true, "setCreateTime": true, "setGrantOption": true, "setPrivilege": true, "setGrantor": true, "setGrantorType": true } ] }, "groupPrivileges": null, "rolePrivileges": null, "rolePrivilegesSize": 0, "setUserPrivileges": true, "setGroupPrivileges": false, "setRolePrivileges": false, "userPrivilegesSize": 1, "groupPrivilegesSize": 0 }, "temporary": false, "rewriteEnabled": false, "setTableName": true, "setDbName": true, "setOwner": true, "setViewOriginalText": false, "setViewExpandedText": false, "setTableType": true, "setPrivileges": true, "setCreateTime": true, "setLastAccessTime": true, "setRetention": true, "partitionKeysIterator": [], "parametersSize": 7, "setTemporary": true, "setRewriteEnabled": false, "setParameters": true, "setPartitionKeys": true, "setSd": true, "partitionKeysSize": 0 } }
it should only be the json message required as :
{ "eventType": "CREATE_TABLE", "server": "", "servicePrincipal": "", "db": "default", "table": "a", "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}", "timestamp": 1489552350, "files": [], }
this will require adding serialization features to mapper use such that it only serializes the annotated fields.
Attachments
Attachments
Issue Links
- links to