Details
Description
JsonDecoder maintains state for each record decoded, leading to a memory leak if the same instance is used for multiple inputs. Using JsonDecoder.configure to change the input does not correctly clean up the state stored in JsonDecoder.reorderBuffers, which leads to an unbounded number of ReorderBuffer instances being accumulated. If a new JsonDecoder is created for each input there is no memory leak, but it is significantly more expensive than reusing the same instance.
This problem seems to only occur when the input schema contains a record, which is consistent with the reorderBuffers being the source of the leak. My first look at the JsonDecoder code leads me to believe that the reorderBuffers stack should be empty after a record is fully processed, so there may be other behavior at play here.
The following is a minimal example which will exhaust a 50MB heap (-Xmx50m) after about 5.25 million iterations. The first section demonstrates that no memory leak is encountered when creating a fresh JsonDecoder instance for each input.
import org.apache.avro.Schema; import org.apache.avro.io.*; import org.apache.avro.generic.*; import java.io.IOException; public class JsonDecoderMemoryLeak { public static DecoderFactory decoderFactory = DecoderFactory.get(); public static JsonDecoder createDecoder(String input, Schema schema) throws IOException { return decoderFactory.jsonDecoder(schema, input); } public static Object decodeAvro(String input, Schema schema, JsonDecoder decoder) throws IOException { if (decoder == null) { decoder = createDecoder(input, schema); } else { decoder.configure(input); } GenericDatumReader reader = new GenericDatumReader<GenericRecord>(schema); return reader.read(null, decoder); } public static Schema.Parser parser = new Schema.Parser(); public static Schema schema = parser.parse("{\"name\": \"TestRecord\", \"type\": \"record\", \"fields\": [{\"name\": \"field1\", \"type\": \"long\"}]}"); public static String record(long i) { StringBuilder builder = new StringBuilder("{\"field1\": "); builder.append(i); builder.append("}"); return builder.toString(); } public static void main(String[] args) throws IOException { // No memory issues when creating a new decoder for each record System.out.println("Running with fresh JsonDecoder instances for 6000000 iterations"); for(long i = 0; i < 6000000; i++) { decodeAvro(record(i), schema, null); } // Runs out of memory after ~5250000 records System.out.println("Running with a single reused JsonDecoder instance"); long count = 0; try { JsonDecoder decoder = createDecoder(record(0), schema); while(true) { decodeAvro(record(count), schema, decoder); count++; } } catch (OutOfMemoryError e) { System.out.println("Out of memory after " + count + " records"); e.printStackTrace(); } } }
$ java -Xmx50m -jar json-decoder-memory-leak.jar Running with fresh JsonDecoder instances for 6000000 iterations Running with a single reused JsonDecoder instance Out of memory after 5242880 records java.lang.OutOfMemoryError: Java heap space at java.util.Arrays.copyOf(Arrays.java:3210) at java.util.Arrays.copyOf(Arrays.java:3181) at java.util.Vector.grow(Vector.java:266) at java.util.Vector.ensureCapacityHelper(Vector.java:246) at java.util.Vector.addElement(Vector.java:620) at java.util.Stack.push(Stack.java:67) at org.apache.avro.io.JsonDecoder.doAction(JsonDecoder.java:487) at org.apache.avro.io.parsing.Parser.advance(Parser.java:88) at org.apache.avro.io.JsonDecoder.advance(JsonDecoder.java:139) at org.apache.avro.io.JsonDecoder.readLong(JsonDecoder.java:178) at org.apache.avro.io.ResolvingDecoder.readLong(ResolvingDecoder.java:162) at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:183) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:152) at org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:240) at org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:230) at org.apache.avro.generic.GenericDatumReader.readWithoutConversion(GenericDatumReader.java:174) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:152) at org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:144) at com.spiceworks.App.decodeAvro(App.java:25) at com.spiceworks.App.main(App.java:52)